From 2a6baee3c86075e25a781a7995f41555df86c3e4 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 18:48:54 +0200
Subject: [PATCH 01/32] Add typing to arrow using stubs

---
 .github/workflows/python.yml                 |    6 +
 dev/release/rat_exclude_files.txt            |    1 +
 python/py.typed                              |   16 +
 python/pyproject.toml                        |   21 +
 python/stubs/LICENSE                         |   24 +
 python/stubs/__init__.pyi                    |  656 ++
 python/stubs/__lib_pxi/__init__.pyi          |    0
 python/stubs/__lib_pxi/array.pyi             | 4274 ++++++++++
 python/stubs/__lib_pxi/benchmark.pyi         |    1 +
 python/stubs/__lib_pxi/builder.pyi           |   89 +
 python/stubs/__lib_pxi/compat.pyi            |    5 +
 python/stubs/__lib_pxi/config.pyi            |   41 +
 python/stubs/__lib_pxi/device.pyi            |   88 +
 python/stubs/__lib_pxi/error.pyi             |   53 +
 python/stubs/__lib_pxi/io.pyi                | 1474 ++++
 python/stubs/__lib_pxi/ipc.pyi               |  705 ++
 python/stubs/__lib_pxi/memory.pyi            |  174 +
 python/stubs/__lib_pxi/pandas_shim.pyi       |   51 +
 python/stubs/__lib_pxi/scalar.pyi            | 1017 +++
 python/stubs/__lib_pxi/table.pyi             | 5609 +++++++++++++
 python/stubs/__lib_pxi/tensor.pyi            |  688 ++
 python/stubs/__lib_pxi/types.pyi             | 4413 ++++++++++
 python/stubs/_azurefs.pyi                    |   74 +
 python/stubs/_compute.pyi                    | 1721 ++++
 python/stubs/_csv.pyi                        |  641 ++
 python/stubs/_cuda.pyi                       |  556 ++
 python/stubs/_dataset.pyi                    | 2299 ++++++
 python/stubs/_dataset_orc.pyi                |    6 +
 python/stubs/_dataset_parquet.pyi            |  314 +
 python/stubs/_dataset_parquet_encryption.pyi |   85 +
 python/stubs/_feather.pyi                    |   29 +
 python/stubs/_flight.pyi                     | 1380 ++++
 python/stubs/_fs.pyi                         | 1005 +++
 python/stubs/_gcsfs.pyi                      |   83 +
 python/stubs/_hdfs.pyi                       |   75 +
 python/stubs/_json.pyi                       |  169 +
 python/stubs/_orc.pyi                        |   56 +
 python/stubs/_parquet.pyi                    |  445 +
 python/stubs/_parquet_encryption.pyi         |   67 +
 python/stubs/_s3fs.pyi                       |   74 +
 python/stubs/_stubs_typing.pyi               |   80 +
 python/stubs/_substrait.pyi                  |   39 +
 python/stubs/acero.pyi                       |   85 +
 python/stubs/benchmark.pyi                   |    3 +
 python/stubs/cffi.pyi                        |    4 +
 python/stubs/compute.pyi                     | 7779 ++++++++++++++++++
 python/stubs/csv.pyi                         |   27 +
 python/stubs/cuda.pyi                        |   25 +
 python/stubs/dataset.pyi                     |  229 +
 python/stubs/feather.pyi                     |   50 +
 python/stubs/flight.pyi                      |   95 +
 python/stubs/fs.pyi                          |   77 +
 python/stubs/gandiva.pyi                     |   65 +
 python/stubs/interchange/__init__.pyi        |    0
 python/stubs/interchange/buffer.pyi          |   58 +
 python/stubs/interchange/column.pyi          |  252 +
 python/stubs/interchange/dataframe.pyi       |  102 +
 python/stubs/interchange/from_dataframe.pyi  |  244 +
 python/stubs/ipc.pyi                         |  123 +
 python/stubs/json.pyi                        |    3 +
 python/stubs/lib.pyi                         |  106 +
 python/stubs/orc.pyi                         |  279 +
 python/stubs/pandas_compat.pyi               |   54 +
 python/stubs/parquet/__init__.pyi            |    1 +
 python/stubs/parquet/core.pyi                | 2061 +++++
 python/stubs/parquet/encryption.pyi          |   15 +
 python/stubs/substrait.pyi                   |   21 +
 python/stubs/types.pyi                       |  194 +
 python/stubs/util.pyi                        |   27 +
 69 files changed, 40583 insertions(+)
 create mode 100644 python/py.typed
 create mode 100644 python/stubs/LICENSE
 create mode 100644 python/stubs/__init__.pyi
 create mode 100644 python/stubs/__lib_pxi/__init__.pyi
 create mode 100644 python/stubs/__lib_pxi/array.pyi
 create mode 100644 python/stubs/__lib_pxi/benchmark.pyi
 create mode 100644 python/stubs/__lib_pxi/builder.pyi
 create mode 100644 python/stubs/__lib_pxi/compat.pyi
 create mode 100644 python/stubs/__lib_pxi/config.pyi
 create mode 100644 python/stubs/__lib_pxi/device.pyi
 create mode 100644 python/stubs/__lib_pxi/error.pyi
 create mode 100644 python/stubs/__lib_pxi/io.pyi
 create mode 100644 python/stubs/__lib_pxi/ipc.pyi
 create mode 100644 python/stubs/__lib_pxi/memory.pyi
 create mode 100644 python/stubs/__lib_pxi/pandas_shim.pyi
 create mode 100644 python/stubs/__lib_pxi/scalar.pyi
 create mode 100644 python/stubs/__lib_pxi/table.pyi
 create mode 100644 python/stubs/__lib_pxi/tensor.pyi
 create mode 100644 python/stubs/__lib_pxi/types.pyi
 create mode 100644 python/stubs/_azurefs.pyi
 create mode 100644 python/stubs/_compute.pyi
 create mode 100644 python/stubs/_csv.pyi
 create mode 100644 python/stubs/_cuda.pyi
 create mode 100644 python/stubs/_dataset.pyi
 create mode 100644 python/stubs/_dataset_orc.pyi
 create mode 100644 python/stubs/_dataset_parquet.pyi
 create mode 100644 python/stubs/_dataset_parquet_encryption.pyi
 create mode 100644 python/stubs/_feather.pyi
 create mode 100644 python/stubs/_flight.pyi
 create mode 100644 python/stubs/_fs.pyi
 create mode 100644 python/stubs/_gcsfs.pyi
 create mode 100644 python/stubs/_hdfs.pyi
 create mode 100644 python/stubs/_json.pyi
 create mode 100644 python/stubs/_orc.pyi
 create mode 100644 python/stubs/_parquet.pyi
 create mode 100644 python/stubs/_parquet_encryption.pyi
 create mode 100644 python/stubs/_s3fs.pyi
 create mode 100644 python/stubs/_stubs_typing.pyi
 create mode 100644 python/stubs/_substrait.pyi
 create mode 100644 python/stubs/acero.pyi
 create mode 100644 python/stubs/benchmark.pyi
 create mode 100644 python/stubs/cffi.pyi
 create mode 100644 python/stubs/compute.pyi
 create mode 100644 python/stubs/csv.pyi
 create mode 100644 python/stubs/cuda.pyi
 create mode 100644 python/stubs/dataset.pyi
 create mode 100644 python/stubs/feather.pyi
 create mode 100644 python/stubs/flight.pyi
 create mode 100644 python/stubs/fs.pyi
 create mode 100644 python/stubs/gandiva.pyi
 create mode 100644 python/stubs/interchange/__init__.pyi
 create mode 100644 python/stubs/interchange/buffer.pyi
 create mode 100644 python/stubs/interchange/column.pyi
 create mode 100644 python/stubs/interchange/dataframe.pyi
 create mode 100644 python/stubs/interchange/from_dataframe.pyi
 create mode 100644 python/stubs/ipc.pyi
 create mode 100644 python/stubs/json.pyi
 create mode 100644 python/stubs/lib.pyi
 create mode 100644 python/stubs/orc.pyi
 create mode 100644 python/stubs/pandas_compat.pyi
 create mode 100644 python/stubs/parquet/__init__.pyi
 create mode 100644 python/stubs/parquet/core.pyi
 create mode 100644 python/stubs/parquet/encryption.pyi
 create mode 100644 python/stubs/substrait.pyi
 create mode 100644 python/stubs/types.pyi
 create mode 100644 python/stubs/util.pyi

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 15dfa11fc4c..15906a10ac0 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,6 +138,12 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
+      - name: Type check with ty
+        working-directory: python
+        run: |-
+            python -m pip install ty
+            python -m ty check
+
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
     runs-on: macos-${{ matrix.macos-version }}
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 51c01516e7c..c4dc26e7784 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -95,6 +95,7 @@ python/pyarrow/tests/__init__.py
 python/pyarrow/vendored/*
 python/pyarrow/src/arrow/python/vendored/*
 python/requirements*.txt
+python/stubs/*
 pax_global_header
 MANIFEST.in
 __init__.pxd
diff --git a/python/py.typed b/python/py.typed
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/python/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 113d0b16f19..5c0580a0510 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -97,3 +97,24 @@ version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
 fallback_version = '22.0.0a0'
+
+[tool.ty.rules]
+invalid-argument-type = "ignore"
+invalid-assignment = "ignore"
+invalid-context-manager = "ignore"
+invalid-return-type = "ignore"
+invalid-type-form = "ignore"
+no-matching-overload = "ignore"
+non-subscriptable = "ignore"
+not-iterable = "ignore"
+possibly-unbound-attribute = "ignore"
+possibly-unbound-import = "ignore"
+too-many-positional-arguments = "ignore"
+unknown-argument = "ignore"
+unresolved-attribute = "ignore"
+unresolved-global = "ignore"
+unresolved-import = "ignore"
+unresolved-reference = "ignore"
+unsupported-operator = "ignore"
+missing-argument = "ignore"
+call-non-callable = "ignore"
diff --git a/python/stubs/LICENSE b/python/stubs/LICENSE
new file mode 100644
index 00000000000..6d8e2aff5b7
--- /dev/null
+++ b/python/stubs/LICENSE
@@ -0,0 +1,24 @@
+BSD 2-Clause License
+
+Copyright (c) 2024, ZhengYu, Xu
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/python/stubs/__init__.pyi b/python/stubs/__init__.pyi
new file mode 100644
index 00000000000..8a0d1e870c5
--- /dev/null
+++ b/python/stubs/__init__.pyi
@@ -0,0 +1,656 @@
+# ruff: noqa: F401, I001, E402
+__version__: str
+
+import pyarrow.lib as _lib
+
+_gc_enabled: bool
+
+from pyarrow.lib import (
+    BuildInfo,
+    RuntimeInfo,
+    set_timezone_db_path,
+    MonthDayNano,
+    VersionInfo,
+    cpp_build_info,
+    cpp_version,
+    cpp_version_info,
+    runtime_info,
+    cpu_count,
+    set_cpu_count,
+    enable_signal_handlers,
+    io_thread_count,
+    set_io_thread_count,
+)
+
+def show_versions() -> None: ...
+def show_info() -> None: ...
+def _module_is_available(module: str) -> bool: ...
+def _filesystem_is_available(fs: str) -> bool: ...
+
+from pyarrow.lib import (
+    null,
+    bool_,
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    time32,
+    time64,
+    timestamp,
+    date32,
+    date64,
+    duration,
+    month_day_nano_interval,
+    float16,
+    float32,
+    float64,
+    binary,
+    string,
+    utf8,
+    binary_view,
+    string_view,
+    large_binary,
+    large_string,
+    large_utf8,
+    decimal32,
+    decimal64,
+    decimal128,
+    decimal256,
+    list_,
+    large_list,
+    list_view,
+    large_list_view,
+    map_,
+    struct,
+    union,
+    sparse_union,
+    dense_union,
+    dictionary,
+    run_end_encoded,
+    json_,
+    uuid,
+    fixed_shape_tensor,
+    bool8,
+    opaque,
+    field,
+    type_for_alias,
+    DataType,
+    DictionaryType,
+    StructType,
+    ListType,
+    LargeListType,
+    FixedSizeListType,
+    ListViewType,
+    LargeListViewType,
+    MapType,
+    UnionType,
+    SparseUnionType,
+    DenseUnionType,
+    TimestampType,
+    Time32Type,
+    Time64Type,
+    DurationType,
+    FixedSizeBinaryType,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    BaseExtensionType,
+    ExtensionType,
+    RunEndEncodedType,
+    FixedShapeTensorType,
+    Bool8Type,
+    UuidType,
+    JsonType,
+    OpaqueType,
+    PyExtensionType,
+    UnknownExtensionType,
+    register_extension_type,
+    unregister_extension_type,
+    DictionaryMemo,
+    KeyValueMetadata,
+    Field,
+    Schema,
+    schema,
+    unify_schemas,
+    Array,
+    Tensor,
+    array,
+    chunked_array,
+    record_batch,
+    nulls,
+    repeat,
+    SparseCOOTensor,
+    SparseCSRMatrix,
+    SparseCSCMatrix,
+    SparseCSFTensor,
+    infer_type,
+    from_numpy_dtype,
+    NullArray,
+    NumericArray,
+    IntegerArray,
+    FloatingPointArray,
+    BooleanArray,
+    Int8Array,
+    UInt8Array,
+    Int16Array,
+    UInt16Array,
+    Int32Array,
+    UInt32Array,
+    Int64Array,
+    UInt64Array,
+    HalfFloatArray,
+    FloatArray,
+    DoubleArray,
+    ListArray,
+    LargeListArray,
+    FixedSizeListArray,
+    ListViewArray,
+    LargeListViewArray,
+    MapArray,
+    UnionArray,
+    BinaryArray,
+    StringArray,
+    LargeBinaryArray,
+    LargeStringArray,
+    BinaryViewArray,
+    StringViewArray,
+    FixedSizeBinaryArray,
+    DictionaryArray,
+    Date32Array,
+    Date64Array,
+    TimestampArray,
+    Time32Array,
+    Time64Array,
+    DurationArray,
+    MonthDayNanoIntervalArray,
+    Decimal32Array,
+    Decimal64Array,
+    Decimal128Array,
+    Decimal256Array,
+    StructArray,
+    ExtensionArray,
+    RunEndEncodedArray,
+    FixedShapeTensorArray,
+    Bool8Array,
+    UuidArray,
+    JsonArray,
+    OpaqueArray,
+    scalar,
+    NA,
+    _NULL as NULL,
+    Scalar,
+    NullScalar,
+    BooleanScalar,
+    Int8Scalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    UInt8Scalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    HalfFloatScalar,
+    FloatScalar,
+    DoubleScalar,
+    Decimal32Scalar,
+    Decimal64Scalar,
+    Decimal128Scalar,
+    Decimal256Scalar,
+    ListScalar,
+    LargeListScalar,
+    FixedSizeListScalar,
+    ListViewScalar,
+    LargeListViewScalar,
+    Date32Scalar,
+    Date64Scalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    DurationScalar,
+    MonthDayNanoIntervalScalar,
+    BinaryScalar,
+    LargeBinaryScalar,
+    BinaryViewScalar,
+    StringScalar,
+    LargeStringScalar,
+    StringViewScalar,
+    FixedSizeBinaryScalar,
+    DictionaryScalar,
+    MapScalar,
+    StructScalar,
+    UnionScalar,
+    RunEndEncodedScalar,
+    ExtensionScalar,
+    Bool8Scalar,
+    UuidScalar,
+    JsonScalar,
+    OpaqueScalar,
+)
+
+# Buffers, allocation
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+
+from pyarrow.lib import (
+    Buffer,
+    ResizableBuffer,
+    foreign_buffer,
+    py_buffer,
+    Codec,
+    compress,
+    decompress,
+    allocate_buffer,
+)
+
+from pyarrow.lib import (
+    MemoryPool,
+    LoggingMemoryPool,
+    ProxyMemoryPool,
+    total_allocated_bytes,
+    set_memory_pool,
+    default_memory_pool,
+    system_memory_pool,
+    jemalloc_memory_pool,
+    mimalloc_memory_pool,
+    logging_memory_pool,
+    proxy_memory_pool,
+    log_memory_allocations,
+    jemalloc_set_decay_ms,
+    supported_memory_backends,
+)
+
+# I/O
+from pyarrow.lib import (
+    NativeFile,
+    PythonFile,
+    BufferedInputStream,
+    BufferedOutputStream,
+    CacheOptions,
+    CompressedInputStream,
+    CompressedOutputStream,
+    TransformInputStream,
+    transcoding_input_stream,
+    FixedSizeBufferWriter,
+    BufferReader,
+    BufferOutputStream,
+    OSFile,
+    MemoryMappedFile,
+    memory_map,
+    create_memory_map,
+    MockOutputStream,
+    input_stream,
+    output_stream,
+    have_libhdfs,
+)
+
+from pyarrow.lib import (
+    ChunkedArray,
+    RecordBatch,
+    Table,
+    table,
+    concat_arrays,
+    concat_tables,
+    TableGroupBy,
+    RecordBatchReader,
+)
+
+# Exceptions
+from pyarrow.lib import (
+    ArrowCancelled,
+    ArrowCapacityError,
+    ArrowException,
+    ArrowKeyError,
+    ArrowIndexError,
+    ArrowInvalid,
+    ArrowIOError,
+    ArrowMemoryError,
+    ArrowNotImplementedError,
+    ArrowTypeError,
+    ArrowSerializationError,
+)
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+
+import pyarrow.types as types
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+from pyarrow.ipc import (
+    Message,
+    MessageReader,
+    MetadataVersion,
+    RecordBatchFileReader,
+    RecordBatchFileWriter,
+    RecordBatchStreamReader,
+    RecordBatchStreamWriter,
+)
+
+# ----------------------------------------------------------------------
+# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
+# wheels)
+def get_include() -> str: ...
+def _get_pkg_config_executable() -> str: ...
+def _has_pkg_config(pkgname: str) -> bool: ...
+def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
+
+__all__ = [
+    "__version__",
+    "_lib",
+    "_gc_enabled",
+    "BuildInfo",
+    "RuntimeInfo",
+    "set_timezone_db_path",
+    "MonthDayNano",
+    "VersionInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "cpu_count",
+    "set_cpu_count",
+    "enable_signal_handlers",
+    "io_thread_count",
+    "set_io_thread_count",
+    "show_versions",
+    "show_info",
+    "_module_is_available",
+    "_filesystem_is_available",
+    "null",
+    "bool_",
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+    "time32",
+    "time64",
+    "timestamp",
+    "date32",
+    "date64",
+    "duration",
+    "month_day_nano_interval",
+    "float16",
+    "float32",
+    "float64",
+    "binary",
+    "string",
+    "utf8",
+    "binary_view",
+    "string_view",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "struct",
+    "union",
+    "sparse_union",
+    "dense_union",
+    "dictionary",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "field",
+    "type_for_alias",
+    "DataType",
+    "DictionaryType",
+    "StructType",
+    "ListType",
+    "LargeListType",
+    "FixedSizeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "MapType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "BaseExtensionType",
+    "ExtensionType",
+    "RunEndEncodedType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "PyExtensionType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "DictionaryMemo",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "schema",
+    "unify_schemas",
+    "Array",
+    "Tensor",
+    "array",
+    "chunked_array",
+    "record_batch",
+    "nulls",
+    "repeat",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+    "infer_type",
+    "from_numpy_dtype",
+    "NullArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "BooleanArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "ListArray",
+    "LargeListArray",
+    "FixedSizeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "MapArray",
+    "UnionArray",
+    "BinaryArray",
+    "StringArray",
+    "LargeBinaryArray",
+    "LargeStringArray",
+    "BinaryViewArray",
+    "StringViewArray",
+    "FixedSizeBinaryArray",
+    "DictionaryArray",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "StructArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "RunEndEncodedArray",
+    "FixedShapeTensorArray",
+    "scalar",
+    "NA",
+    "NULL",
+    "Scalar",
+    "NullScalar",
+    "BooleanScalar",
+    "Int8Scalar",
+    "Int16Scalar",
+    "Int32Scalar",
+    "Int64Scalar",
+    "UInt8Scalar",
+    "UInt16Scalar",
+    "UInt32Scalar",
+    "UInt64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "ListScalar",
+    "LargeListScalar",
+    "FixedSizeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "BinaryViewScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "StringViewScalar",
+    "FixedSizeBinaryScalar",
+    "DictionaryScalar",
+    "MapScalar",
+    "StructScalar",
+    "UnionScalar",
+    "RunEndEncodedScalar",
+    "ExtensionScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "DeviceAllocationType",
+    "Device",
+    "MemoryManager",
+    "default_cpu_memory_manager",
+    "Buffer",
+    "ResizableBuffer",
+    "foreign_buffer",
+    "py_buffer",
+    "Codec",
+    "compress",
+    "decompress",
+    "allocate_buffer",
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "total_allocated_bytes",
+    "set_memory_pool",
+    "default_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "logging_memory_pool",
+    "proxy_memory_pool",
+    "log_memory_allocations",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+    "NativeFile",
+    "PythonFile",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "CacheOptions",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "TransformInputStream",
+    "transcoding_input_stream",
+    "FixedSizeBufferWriter",
+    "BufferReader",
+    "BufferOutputStream",
+    "OSFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "MockOutputStream",
+    "input_stream",
+    "output_stream",
+    "have_libhdfs",
+    "ChunkedArray",
+    "RecordBatch",
+    "Table",
+    "table",
+    "concat_arrays",
+    "concat_tables",
+    "TableGroupBy",
+    "RecordBatchReader",
+    "ArrowCancelled",
+    "ArrowCapacityError",
+    "ArrowException",
+    "ArrowKeyError",
+    "ArrowIndexError",
+    "ArrowInvalid",
+    "ArrowIOError",
+    "ArrowMemoryError",
+    "ArrowNotImplementedError",
+    "ArrowTypeError",
+    "ArrowSerializationError",
+    "serialize_pandas",
+    "deserialize_pandas",
+    "ipc",
+    "types",
+    "_deprecate_api",
+    "_deprecate_class",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "get_include",
+    "_get_pkg_config_executable",
+    "_has_pkg_config",
+    "_read_pkg_config_variable",
+    "get_libraries",
+    "create_library_symlinks",
+    "get_library_dirs",
+]
diff --git a/python/stubs/__lib_pxi/__init__.pyi b/python/stubs/__lib_pxi/__init__.pyi
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/stubs/__lib_pxi/array.pyi b/python/stubs/__lib_pxi/array.pyi
new file mode 100644
index 00000000000..ec1cda30a88
--- /dev/null
+++ b/python/stubs/__lib_pxi/array.pyi
@@ -0,0 +1,4274 @@
+import datetime as dt
+import sys
+
+from collections.abc import Callable
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    Any,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    TypeVar,
+    overload,
+)
+
+import numpy as np
+import pandas as pd
+
+from pandas.core.dtypes.base import ExtensionDtype
+from pyarrow._compute import CastOptions
+from pyarrow._stubs_typing import (
+    ArrayLike,
+    Indices,
+    Mask,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+)
+from pyarrow.lib import (
+    Buffer,
+    Device,
+    MemoryManager,
+    MemoryPool,
+    MonthDayNano,
+    Tensor,
+    _Weakrefable,
+)
+from typing_extensions import deprecated
+
+from . import scalar, types
+from .device import DeviceAllocationType
+from .scalar import NullableCollection, Scalar
+from .types import (
+    DataType,
+    Field,
+    MapType,
+    _AsPyType,
+    _BasicDataType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _RunEndType,
+    _Size,
+)
+
+@overload
+def array(
+    values: NullableCollection[bool],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanArray: ...
+@overload
+def array(
+    values: NullableCollection[int],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Array: ...
+@overload
+def array(
+    values: NullableCollection[float],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleArray: ...
+@overload
+def array(
+    values: NullableCollection[Decimal],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Array: ...
+@overload
+def array(
+    values: NullableCollection[dict[str, Any]],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def array(
+    values: NullableCollection[dt.date],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Array: ...
+@overload
+def array(
+    values: NullableCollection[dt.time],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def array(
+    values: NullableCollection[dt.timedelta],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def array(
+    values: NullableCollection[MonthDayNano],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def array(
+    values: NullableCollection[str],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def array(
+    values: NullableCollection[bytes],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def array(
+    values: NullableCollection[list[Any]],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[Any]: ...
+@overload
+def array(
+    values: NullableCollection[_ScalarT],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Array[_ScalarT]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["null"] | types.NullType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> NullArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["bool", "boolean"] | types.BoolType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i1", "int8"] | types.Int8Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int8Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i2", "int16"] | types.Int16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int16Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i4", "int32"] | types.Int32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i8", "int64"] | types.Int64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt8Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt16Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> HalfFloatArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> FloatArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string", "str", "utf8"] | types.StringType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary"] | types.BinaryType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_binary"] | types.LargeBinaryType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary_view"] | types.BinaryViewType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string_view"] | types.StringViewType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Array[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Array[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["ns"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["ns"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Array[Scalar[_DataTypeT]]: ...
+def array(*args, **kawrgs):
+    """
+    Create pyarrow.Array instance from a Python object.
+
+    Parameters
+    ----------
+    obj : sequence, iterable, ndarray, pandas.Series, Arrow-compatible array
+        If both type and size are specified may be a single use iterable. If
+        not strongly-typed, Arrow type will be inferred for resulting array.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_device_array__`` method)
+        can be passed as well.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the data.
+    mask : array[bool], optional
+        Indicate which values are null (True) or not null (False).
+    size : int64, optional
+        Size of the elements. If the input is larger than size bail at this
+        length. For iterators, if size is larger than the input iterator this
+        will be treated as a "max size", but will involve an initial allocation
+        of size followed by a resize to the actual size (so if you know the
+        exact size specifying it correctly will give you better performance).
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. If passed, the mask tasks precedence, but
+        if a value is unmasked (not-null), but still null according to
+        pandas semantics, then it is null. Defaults to False if not
+        passed explicitly by user, or True if a pandas object is
+        passed in.
+    safe : bool, default True
+        Check for overflows or other unsafe conversions.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
+
+    Returns
+    -------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+        A ChunkedArray instead of an Array is returned if:
+
+        - the object data overflowed binary storage.
+        - the object's ``__arrow_array__`` protocol method returned a chunked
+          array.
+
+    Notes
+    -----
+    Timezone will be preserved in the returned array for timezone-aware data,
+    else no timezone will be returned for naive timestamps.
+    Internally, UTC values are stored for timezone-aware data with the
+    timezone set in the data type.
+
+    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
+    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
+    are ignored as are all absolute fields on both objects. datetime.timedelta
+    can also be converted to MonthDayNanoIntervalArray but this requires
+    passing MonthDayNanoIntervalType explicitly.
+
+    Converting to dictionary array will promote to a wider integer type for
+    indices if the number of distinct values cannot be represented, even if
+    the index type was explicitly set. This means that if there are more than
+    127 values the returned dictionary array's index type will be at least
+    pa.int16() even if pa.int8() was passed to the function. Note that an
+    explicit index type will not be demoted even if it is wider than required.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import pyarrow as pa
+    >>> pa.array(pd.Series([1, 2]))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2
+    ]
+
+    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
+    <pyarrow.lib.DictionaryArray object at ...>
+    ...
+    -- dictionary:
+      [
+        "a",
+        "b"
+      ]
+    -- indices:
+      [
+        0,
+        1,
+        0
+      ]
+
+    >>> import numpy as np
+    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      null
+    ]
+
+    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
+    >>> arr.type.index_type
+    DataType(int16)
+    """
+
+@overload
+def asarray(values: NullableCollection[bool]) -> BooleanArray: ...
+@overload
+def asarray(values: NullableCollection[int]) -> Int64Array: ...
+@overload
+def asarray(values: NullableCollection[float]) -> DoubleArray: ...
+@overload
+def asarray(values: NullableCollection[Decimal]) -> Decimal128Array: ...
+@overload
+def asarray(values: NullableCollection[dict[str, Any]]) -> StructArray: ...
+@overload
+def asarray(values: NullableCollection[dt.date]) -> Date32Array: ...
+@overload
+def asarray(values: NullableCollection[dt.time]) -> Time64Array: ...
+@overload
+def asarray(values: NullableCollection[dt.timedelta]) -> DurationArray: ...
+@overload
+def asarray(values: NullableCollection[MonthDayNano]) -> MonthDayNanoIntervalArray: ...
+@overload
+def asarray(values: NullableCollection[list[Any]]) -> ListArray[Any]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["null"] | types.NullType,
+) -> NullArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["bool", "boolean"] | types.BoolType,
+) -> BooleanArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i1", "int8"] | types.Int8Type,
+) -> Int8Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i2", "int16"] | types.Int16Type,
+) -> Int16Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i4", "int32"] | types.Int32Type,
+) -> Int32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i8", "int64"] | types.Int64Type,
+) -> Int64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+) -> UInt8Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+) -> UInt16Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+) -> UInt32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+) -> UInt64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+) -> HalfFloatArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+) -> FloatArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+) -> DoubleArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string", "str", "utf8"] | types.StringType,
+) -> StringArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary"] | types.BinaryType,
+) -> BinaryArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+) -> LargeStringArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_binary"] | types.LargeBinaryType,
+) -> LargeBinaryArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary_view"] | types.BinaryViewType,
+) -> BinaryViewArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string_view"] | types.StringViewType,
+) -> StringViewArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+) -> Date32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+) -> Date64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+) -> Time32Array[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+) -> Time32Array[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+) -> Time64Array[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+) -> TimestampArray[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+) -> TimestampArray[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+) -> TimestampArray[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
+) -> TimestampArray[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+) -> DurationArray[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+) -> DurationArray[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+) -> DurationArray[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT,
+) -> Array[Scalar[_DataTypeT]]: ...
+def asarray(*args, **kwargs):
+    """
+    Convert to pyarrow.Array, inferring type if not provided.
+
+    Parameters
+    ----------
+    values : array-like
+        This can be a sequence, numpy.ndarray, pyarrow.Array or
+        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
+        a ChunkedArray, otherwise the output will be a Array.
+    type : string or DataType
+        Explicitly construct the array with this type. Attempt to cast if
+        indicated type is different.
+
+    Returns
+    -------
+    arr : Array or ChunkedArray
+    """
+
+@overload
+def nulls(size: int, memory_pool: MemoryPool | None = None) -> NullArray: ...
+@overload
+def nulls(
+    size: int, type: types.NullType | None, memory_pool: MemoryPool | None = None
+) -> NullArray: ...
+@overload
+def nulls(
+    size: int, type: types.BoolType, memory_pool: MemoryPool | None = None
+) -> BooleanArray: ...
+@overload
+def nulls(size: int, type: types.Int8Type, memory_pool: MemoryPool | None = None) -> Int8Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int16Type, memory_pool: MemoryPool | None = None
+) -> Int16Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int32Type, memory_pool: MemoryPool | None = None
+) -> Int32Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int64Type, memory_pool: MemoryPool | None = None
+) -> Int64Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt8Type, memory_pool: MemoryPool | None = None
+) -> UInt8Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt16Type, memory_pool: MemoryPool | None = None
+) -> UInt16Array: ...
+@overload
+def nulls(
+    size: int, type: types.Uint32Type, memory_pool: MemoryPool | None = None
+) -> UInt32Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt64Type, memory_pool: MemoryPool | None = None
+) -> UInt64Array: ...
+@overload
+def nulls(
+    size: int, type: types.Float16Type, memory_pool: MemoryPool | None = None
+) -> HalfFloatArray: ...
+@overload
+def nulls(
+    size: int, type: types.Float32Type, memory_pool: MemoryPool | None = None
+) -> FloatArray: ...
+@overload
+def nulls(
+    size: int, type: types.Float64Type, memory_pool: MemoryPool | None = None
+) -> DoubleArray: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal32Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal64Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal128Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal256Type, memory_pool: MemoryPool | None = None
+) -> Decimal256Array: ...
+@overload
+def nulls(
+    size: int, type: types.Date32Type, memory_pool: MemoryPool | None = None
+) -> Date32Array: ...
+@overload
+def nulls(
+    size: int, type: types.Date64Type, memory_pool: MemoryPool | None = None
+) -> Date64Array: ...
+@overload
+def nulls(
+    size: int, type: types.Time32Type[types._Time32Unit], memory_pool: MemoryPool | None = None
+) -> Time32Array[types._Time32Unit]: ...
+@overload
+def nulls(
+    size: int, type: types.Time64Type[types._Time64Unit], memory_pool: MemoryPool | None = None
+) -> Time64Array[types._Time64Unit]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.TimestampType[types._Unit, types._Tz],
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[types._Unit, types._Tz]: ...
+@overload
+def nulls(
+    size: int, type: types.DurationType[types._Unit], memory_pool: MemoryPool | None = None
+) -> DurationArray[types._Unit]: ...
+@overload
+def nulls(
+    size: int, type: types.MonthDayNanoIntervalType, memory_pool: MemoryPool | None = None
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.BinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeBinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedSizeBinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeBinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StringType,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeStringType,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.BinaryViewType,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StringViewType,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeListType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> LargeListArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ListViewType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> ListViewArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeListViewType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedSizeListType[_DataTypeT, _Size],
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ListType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StructType,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.MapType[_MapKeyT, _MapItemT],
+    memory_pool: MemoryPool | None = None,
+) -> MapArray[_MapKeyT, _MapItemT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.DictionaryType[_IndexT, _BasicValueT],
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryArray[_IndexT, _BasicValueT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.RunEndEncodedType[_RunEndType, _BasicValueT],
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.UnionType,
+    memory_pool: MemoryPool | None = None,
+) -> UnionArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedShapeTensorType[types._ValueT],
+    memory_pool: MemoryPool | None = None,
+) -> FixedShapeTensorArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.Bool8Type,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Array: ...
+@overload
+def nulls(
+    size: int,
+    type: types.UuidType,
+    memory_pool: MemoryPool | None = None,
+) -> UuidArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.JsonType,
+    memory_pool: MemoryPool | None = None,
+) -> JsonArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.OpaqueType,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ExtensionType,
+    memory_pool: MemoryPool | None = None,
+) -> ExtensionArray[Any]: ...
+def nulls(*args, **kwargs):
+    """
+    Create a strongly-typed Array instance with all elements null.
+
+    Parameters
+    ----------
+    size : int
+        Array length.
+    type : pyarrow.DataType, default None
+        Explicit type for the array. By default use NullType.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool if not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.nulls(10)
+    <pyarrow.lib.NullArray object at ...>
+    10 nulls
+
+    >>> pa.nulls(3, pa.uint32())
+    <pyarrow.lib.UInt32Array object at ...>
+    [
+      null,
+      null,
+      null
+    ]
+    """
+
+@overload
+def repeat(
+    value: None | scalar.NullScalar, size: int, memory_pool: MemoryPool | None = None
+) -> NullArray: ...
+@overload
+def repeat(  # type: ignore[overload-overlap]
+    value: bool | scalar.BooleanScalar, size: int, memory_pool: MemoryPool | None = None
+) -> BooleanArray: ...
+@overload
+def repeat(
+    value: scalar.Int8Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int8Array: ...
+@overload
+def repeat(
+    value: scalar.Int16Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int16Array: ...
+@overload
+def repeat(
+    value: scalar.Int32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int32Array: ...
+@overload
+def repeat(
+    value: int | scalar.Int64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int64Array: ...
+@overload
+def repeat(
+    value: scalar.UInt8Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt8Array: ...
+@overload
+def repeat(
+    value: scalar.UInt16Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt16Array: ...
+@overload
+def repeat(
+    value: scalar.UInt32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt32Array: ...
+@overload
+def repeat(
+    value: scalar.UInt64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt64Array: ...
+@overload
+def repeat(
+    value: scalar.HalfFloatScalar, size: int, memory_pool: MemoryPool | None = None
+) -> HalfFloatArray: ...
+@overload
+def repeat(
+    value: scalar.FloatScalar, size: int, memory_pool: MemoryPool | None = None
+) -> FloatArray: ...
+@overload
+def repeat(
+    value: float | scalar.DoubleScalar, size: int, memory_pool: MemoryPool | None = None
+) -> DoubleArray: ...
+@overload
+def repeat(
+    value: Decimal | scalar.Decimal32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal32Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal64Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal128Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal256Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal256Array: ...
+@overload
+def repeat(
+    value: dt.date | scalar.Date32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Date32Array: ...
+@overload
+def repeat(
+    value: scalar.Date64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Date64Array: ...
+@overload
+def repeat(
+    value: scalar.Time32Scalar[types._Time32Unit], size: int, memory_pool: MemoryPool | None = None
+) -> Time32Array[types._Time32Unit]: ...
+@overload
+def repeat(
+    value: dt.time | scalar.Time64Scalar[types._Time64Unit],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[types._Time64Unit]: ...
+@overload
+def repeat(
+    value: scalar.TimestampScalar[types._Unit, types._Tz],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[types._Unit, types._Tz]: ...
+@overload
+def repeat(
+    value: dt.timedelta | scalar.DurationScalar[types._Unit],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[types._Unit]: ...
+@overload
+def repeat(  # pyright: ignore[reportOverlappingOverload]
+    value: MonthDayNano | scalar.MonthDayNanoIntervalScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def repeat(
+    value: bytes | scalar.BinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def repeat(
+    value: scalar.LargeBinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def repeat(
+    value: scalar.FixedSizeBinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeBinaryArray: ...
+@overload
+def repeat(
+    value: str | scalar.StringScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def repeat(
+    value: scalar.LargeStringScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def repeat(
+    value: scalar.BinaryViewScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def repeat(
+    value: scalar.StringViewScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def repeat(
+    value: list[Any] | tuple[Any] | scalar.ListScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+@overload
+def repeat(
+    value: scalar.FixedSizeListScalar[_DataTypeT, _Size],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+@overload
+def repeat(
+    value: scalar.LargeListScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: scalar.ListViewScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ListViewArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: scalar.LargeListViewScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: dict[str, Any] | scalar.StructScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def repeat(
+    value: scalar.MapScalar[_MapKeyT, _MapItemT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> MapArray[_MapKeyT, _MapItemT]: ...
+@overload
+def repeat(
+    value: scalar.DictionaryScalar[_IndexT, _BasicValueT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryArray[_IndexT, _BasicValueT]: ...
+@overload
+def repeat(
+    value: scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
+@overload
+def repeat(
+    value: scalar.UnionScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> UnionArray: ...
+@overload
+def repeat(
+    value: scalar.FixedShapeTensorScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedShapeTensorArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.Bool8Scalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Array: ...
+@overload
+def repeat(
+    value: scalar.UuidScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> UuidArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.JsonScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> JsonArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.OpaqueScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.ExtensionScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ExtensionArray[Any]: ...
+def repeat(*args, **kwargs):
+    """
+    Create an Array instance whose slots are the given scalar.
+
+    Parameters
+    ----------
+    value : Scalar-like object
+        Either a pyarrow.Scalar or any python object coercible to a Scalar.
+    size : int
+        Number of times to repeat the scalar in the output Array.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool if not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.repeat(10, 3)
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      10,
+      10,
+      10
+    ]
+
+    >>> pa.repeat([1, 2], 2)
+    <pyarrow.lib.ListArray object at ...>
+    [
+      [
+        1,
+        2
+      ],
+      [
+        1,
+        2
+      ]
+    ]
+
+    >>> pa.repeat("string", 3)
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "string",
+      "string",
+      "string"
+    ]
+
+    >>> pa.repeat(pa.scalar({"a": 1, "b": [1, 2]}), 2)
+    <pyarrow.lib.StructArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int64
+      [
+        1,
+        1
+      ]
+    -- child 1 type: list<item: int64>
+      [
+        [
+          1,
+          2
+        ],
+        [
+          1,
+          2
+        ]
+      ]
+    """
+
+def infer_type(values: Iterable[Any], mask: Mask, from_pandas: bool = False) -> DataType:
+    """
+    Attempt to infer Arrow data type that can hold the passed Python
+    sequence type in an Array object
+
+    Parameters
+    ----------
+    values : array-like
+        Sequence to infer type from.
+    mask : ndarray (bool type), optional
+        Optional exclusion mask where True marks null, False non-null.
+    from_pandas : bool, default False
+        Use pandas's NA/null sentinel values for type inference.
+
+    Returns
+    -------
+    type : DataType
+    """
+
+class ArrayStatistics(_Weakrefable):
+    """
+    The class for statistics of an array.
+    """
+    @property
+    def null_count(self) -> int:
+        """
+        The number of nulls.
+        """
+    @property
+    def distinct_count(self) -> int:
+        """
+        The number of distinct values.
+        """
+    @property
+    def min(self) -> Any:
+        """
+        The minimum value.
+        """
+    @property
+    def is_min_exact(self) -> bool:
+        """
+        Whether the minimum value is an exact value or not.
+        """
+    @property
+    def max(self) -> Any:
+        """
+        The maximum value.
+        """
+
+    @property
+    def is_max_exact(self) -> bool:
+        """
+        Whether the maximum value is an exact value or not.
+        """
+
+_ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
+
+class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
+    def to_pandas(
+        self,
+        memory_pool: MemoryPool | None = None,
+        categories: list | None = None,
+        strings_to_categorical: bool = False,
+        zero_copy_only: bool = False,
+        integer_object_nulls: bool = False,
+        date_as_object: bool = True,
+        timestamp_as_object: bool = False,
+        use_threads: bool = True,
+        deduplicate_objects: bool = True,
+        ignore_metadata: bool = False,
+        safe: bool = True,
+        split_blocks: bool = False,
+        self_destruct: bool = False,
+        maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
+        types_mapper: Callable[[DataType], ExtensionDtype | None] | None = None,
+        coerce_temporal_nanoseconds: bool = False,
+    ) -> _ConvertAs:
+        """
+        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Arrow MemoryPool to use for allocations. Uses the default memory
+            pool if not passed.
+        categories : list, default empty
+            List of fields that should be returned as pandas.Categorical. Only
+            applies to table-like data structures.
+        strings_to_categorical : bool, default False
+            Encode string (UTF8) and binary types to pandas.Categorical.
+        zero_copy_only : bool, default False
+            Raise an ArrowException if this function call would require copying
+            the underlying data.
+        integer_object_nulls : bool, default False
+            Cast integers with nulls to objects
+        date_as_object : bool, default True
+            Cast dates to objects. If False, convert to datetime64 dtype with
+            the equivalent time unit (if supported). Note: in pandas version
+            < 2.0, only datetime64[ns] conversion is supported.
+        timestamp_as_object : bool, default False
+            Cast non-nanosecond timestamps (np.datetime64) to objects. This is
+            useful in pandas version 1.x if you have timestamps that don't fit
+            in the normal date range of nanosecond timestamps (1678 CE-2262 CE).
+            Non-nanosecond timestamps are supported in pandas version 2.0.
+            If False, all timestamps are converted to datetime64 dtype.
+        use_threads : bool, default True
+            Whether to parallelize the conversion using multiple threads.
+        deduplicate_objects : bool, default True
+            Do not create multiple copies Python objects when created, to save
+            on memory use. Conversion will be slower.
+        ignore_metadata : bool, default False
+            If True, do not use the 'pandas' metadata to reconstruct the
+            DataFrame index, if present
+        safe : bool, default True
+            For certain data types, a cast is needed in order to store the
+            data in a pandas DataFrame or Series (e.g. timestamps are always
+            stored as nanoseconds in pandas). This option controls whether it
+            is a safe cast or not.
+        split_blocks : bool, default False
+            If True, generate one internal "block" for each column when
+            creating a pandas.DataFrame from a RecordBatch or Table. While this
+            can temporarily reduce memory note that various pandas operations
+            can trigger "consolidation" which may balloon memory use.
+        self_destruct : bool, default False
+            EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
+            memory while converting the Arrow object to pandas. If you use the
+            object after calling to_pandas with this option it will crash your
+            program.
+
+            Note that you may not see always memory usage improvements. For
+            example, if multiple columns share an underlying allocation,
+            memory can't be freed until all columns are converted.
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+            This can change the ordering of (key, value) pairs, and will
+            deduplicate multiple keys, resulting in a possible loss of data.
+
+            If 'lossy', this key deduplication results in a warning printed
+            when detected. If 'strict', this instead results in an exception
+            being raised when detected.
+        types_mapper : function, default None
+            A function mapping a pyarrow DataType to a pandas ExtensionDtype.
+            This can be used to override the default pandas type for conversion
+            of built-in pyarrow types or in absence of pandas_metadata in the
+            Table schema. The function receives a pyarrow DataType and is
+            expected to return a pandas ExtensionDtype or ``None`` if the
+            default conversion should be used for that type. If you have
+            a dictionary mapping, you can pass ``dict.get`` as function.
+        coerce_temporal_nanoseconds : bool, default False
+            Only applicable to pandas version >= 2.0.
+            A legacy option to coerce date32, date64, duration, and timestamp
+            time units to nanoseconds when converting to pandas. This is the
+            default behavior in pandas version 1.x. Set this option to True if
+            you'd like to use this coercion when using pandas version >= 2.0
+            for backwards compatibility (not recommended otherwise).
+
+        Returns
+        -------
+        pandas.Series or pandas.DataFrame depending on type of object
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+
+        Convert a Table to pandas DataFrame:
+
+        >>> table = pa.table(
+        ...     [
+        ...         pa.array([2, 4, 5, 100]),
+        ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+        ...     ],
+        ...     names=["n_legs", "animals"],
+        ... )
+        >>> table.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+        >>> isinstance(table.to_pandas(), pd.DataFrame)
+        True
+
+        Convert a RecordBatch to pandas DataFrame:
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+        >>> isinstance(batch.to_pandas(), pd.DataFrame)
+        True
+
+        Convert a Chunked Array to pandas Series:
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_pandas()
+        0      2
+        1      2
+        2      4
+        3      4
+        4      5
+        5    100
+        dtype: int64
+        >>> isinstance(n_legs.to_pandas(), pd.Series)
+        True
+        """
+
+_CastAs = TypeVar("_CastAs", bound=DataType)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+
+class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    """
+    The base class for all Arrow arrays.
+    """
+
+    def diff(self, other: Self) -> str:
+        """
+        Compare contents of this array against another one.
+
+        Return a string containing the result of diffing this array
+        (on the left side) against the other array (on the right side).
+
+        Parameters
+        ----------
+        other : Array
+            The other array to compare this array with.
+
+        Returns
+        -------
+        diff : str
+            A human-readable printout of the differences.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> left = pa.array(["one", "two", "three"])
+        >>> right = pa.array(["two", None, "two-and-a-half", "three"])
+        >>> print(left.diff(right))  # doctest: +SKIP
+
+        @@ -0, +0 @@
+        -"one"
+        @@ -2, +1 @@
+        +null
+        +"two-and-a-half"
+        """
+    def cast(
+        self,
+        target_type: _CastAs,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_CastAs]]:
+        """
+        Cast array values to another data type
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, default None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+        memory_pool : MemoryPool, optional
+            memory pool to use for allocations during function execution.
+
+        Returns
+        -------
+        cast : Array
+        """
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]:
+        """
+        Return zero-copy "view" of array as another data type.
+
+        The data types must have compatible columnar buffer layouts
+
+        Parameters
+        ----------
+        target_type : DataType
+            Type to construct view as.
+
+        Returns
+        -------
+        view : Array
+        """
+    def sum(self, **kwargs) -> _Scalar_co:
+        """
+        Sum the values in a numerical array.
+
+        See :func:`pyarrow.compute.sum` for full usage.
+
+        Parameters
+        ----------
+        **kwargs : dict, optional
+            Options to pass to :func:`pyarrow.compute.sum`.
+
+        Returns
+        -------
+        sum : Scalar
+            A scalar containing the sum value.
+        """
+    @property
+    def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+    def unique(self) -> Self:
+        """
+        Compute distinct elements in array.
+
+        Returns
+        -------
+        unique : Array
+            An array of the same data type, with deduplicated elements.
+        """
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray:
+        """
+        Compute dictionary-encoded representation of array.
+
+        See :func:`pyarrow.compute.dictionary_encode` for full usage.
+
+        Parameters
+        ----------
+        null_encoding : str, default "mask"
+            How to handle null entries.
+
+        Returns
+        -------
+        encoded : DictionaryArray
+            A dictionary-encoded version of this array.
+        """
+    def value_count(self) -> StructArray:
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        StructArray
+            An array of  <input type "Values", int64 "Counts"> structs
+        """
+    @overload
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        type: _DataTypeT,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_DataTypeT]]: ...
+    @overload
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar]: ...
+    @staticmethod
+    def from_pandas(*args, **kwargs):
+        """
+        Convert pandas.Series to an Arrow Array.
+
+        This method uses Pandas semantics about what values indicate
+        nulls. See pyarrow.array for more general conversion from arrays or
+        sequences to Arrow arrays.
+
+        Parameters
+        ----------
+        obj : ndarray, pandas.Series, array-like
+        mask : array (boolean), optional
+            Indicate which values are null (True) or not null (False).
+        type : pyarrow.DataType
+            Explicit type to attempt to coerce to, otherwise will be inferred
+            from the data.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the currently-set default
+            memory pool.
+
+        Notes
+        -----
+        Localized timestamps will currently be returned as UTC (pandas's native
+        representation). Timezone-naive data will be implicitly interpreted as
+        UTC.
+
+        Returns
+        -------
+        array : pyarrow.Array or pyarrow.ChunkedArray
+            ChunkedArray is returned if object data overflows binary buffer.
+        """
+    @staticmethod
+    def from_buffers(
+        type: _DataTypeT,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
+    ) -> Array[Scalar[_DataTypeT]]:
+        """
+        Construct an Array from a sequence of buffers.
+
+        The concrete type returned depends on the datatype.
+
+        Parameters
+        ----------
+        type : DataType
+            The value type of the array.
+        length : int
+            The number of values in the array.
+        buffers : List[Buffer]
+            The buffers backing this array.
+        null_count : int, default -1
+            The number of null entries in the array. Negative value means that
+            the null count is not known.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+        children : List[Array], default None
+            Nested type children with length matching type.num_fields.
+
+        Returns
+        -------
+        array : Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the array.
+
+        In other words, the sum of bytes from all buffer
+        ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the array.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+        """
+    def __sizeof__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_string(
+        self,
+        *,
+        indent: int = 2,
+        top_level_indent: int = 0,
+        window: int = 10,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str:
+        """
+        Render a "pretty-printed" string representation of the Array.
+
+        Note: for data on a non-CPU device, the full array is copied to CPU
+        memory.
+
+        Parameters
+        ----------
+        indent : int, default 2
+            How much to indent the internal items in the string to
+            the right, by default ``2``.
+        top_level_indent : int, default 0
+            How much to indent right the entire content of the array,
+            by default ``0``.
+        window : int
+            How many primitive items to preview at the begin and end
+            of the array when the array is bigger than the window.
+            The other items will be ellipsed.
+        container_window : int
+            How many container items (such as a list in a list array)
+            to preview at the begin and end of the array when the array
+            is bigger than the window.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+        """
+    format = to_string
+    def equals(self, other: Self) -> bool: ...
+    def __len__(self) -> int: ...
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray:
+        """
+        Return BooleanArray indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array
+        """
+    def is_nan(self) -> BooleanArray:
+        """
+        Return BooleanArray indicating the NaN values.
+
+        Returns
+        -------
+        array : boolean Array
+        """
+    def is_valid(self) -> BooleanArray:
+        """
+        Return BooleanArray indicating the non-null values.
+        """
+    def fill_null(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]:
+        """
+        See :func:`pyarrow.compute.fill_null` for usage.
+
+        Parameters
+        ----------
+        fill_value : any
+            The replacement value for null entries.
+
+        Returns
+        -------
+        result : Array
+            A new array with nulls replaced by the given value.
+        """
+    @overload
+    def __getitem__(self, key: int) -> _Scalar_co: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key):
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or Array (slice)
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this array.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice.
+        length : int, default None
+            Length of slice (default is until end of Array starting from
+            offset).
+
+        Returns
+        -------
+        sliced : Array
+            An array with the same datatype, containing the sliced values.
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select values from an array.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the array whose values will be returned.
+
+        Returns
+        -------
+        taken : Array
+            An array with the same datatype, containing the taken values.
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove missing values from an array.
+        """
+    def filter(
+        self,
+        mask: Mask,
+        *,
+        null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    ) -> Self:
+        """
+        Select values from an array.
+
+        See :func:`pyarrow.compute.filter` for full usage.
+
+        Parameters
+        ----------
+        mask : Array or array-like
+            The boolean mask to filter the array with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled.
+
+        Returns
+        -------
+        filtered : Array
+            An array of the same type, with only the elements selected by
+            the boolean mask.
+        """
+    @overload
+    def index(
+        self: Array[_ScalarT],
+        value: _ScalarT,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> scalar.Int64Scalar: ...
+    @overload
+    def index(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> scalar.Int64Scalar: ...
+    def index(self, *args, **kwargs):
+        """
+        Find the first index of a value.
+
+        See :func:`pyarrow.compute.index` for full usage.
+
+        Parameters
+        ----------
+        value : Scalar or object
+            The value to look for in the array.
+        start : int, optional
+            The start index where to look for `value`.
+        end : int, optional
+            The end index where to look for `value`.
+        memory_pool : MemoryPool, optional
+            A memory pool for potential memory allocations.
+
+        Returns
+        -------
+        index : Int64Scalar
+            The index of the value in the array (-1 if not found).
+        """
+    def sort(self, order: Order = "ascending", **kwargs) -> Self:
+        """
+        Sort the Array
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : Array
+        """
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> np.ndarray:
+        """
+        Return a NumPy view or copy of this array.
+
+        By default, tries to return a view of this array. This is only
+        supported for primitive arrays with the same memory layout as NumPy
+        (i.e. integers, floating point, ..) and without any nulls.
+
+        For the extension arrays, this method simply delegates to the
+        underlying storage array.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls, or for non-primitive types).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+    def to_pylist(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        map_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]:
+        """
+        Convert to a list of native Python objects.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        lst : list
+        """
+    tolist = to_pylist
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    @property
+    def offset(self) -> int:
+        """
+        A relative position into another array's data.
+
+        The purpose is to enable zero-copy slicing. This value defaults to zero
+        but must be applied on all operations with the physical storage
+        buffers.
+        """
+    def buffers(self) -> list[Buffer | None]:
+        """
+        Return a list of Buffer objects pointing to this array's physical
+        storage.
+
+        To correctly interpret these buffers, you need to also apply the offset
+        multiplied with the size of the stored data type.
+        """
+    def copy_to(self, destination: MemoryManager | Device) -> Self:
+        """
+        Construct a copy of the array with all buffers on destination
+        device.
+
+        This method recursively copies the array's buffers and those of its
+        children onto the destination MemoryManager device and returns the
+        new Array.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        Array
+        """
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self:
+        """
+        Import Array from a C ArrowArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_array__(self, requested_schema=None) -> Any:
+        """
+        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the array to this data type.
+            If None, the array will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self:
+        """
+        Import Array from a C ArrowDeviceArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any:
+        """
+        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
+        of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the array to this data type.
+            If None, the array will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+        kwargs
+            Currently no additional keyword arguments are supported, but
+            this method will accept any keyword with a value of ``None``
+            for compatibility with future keywords.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def __dlpack__(self, stream: int | None = None) -> Any:
+        """Export a primitive array as a DLPack capsule.
+
+        Parameters
+        ----------
+        stream : int, optional
+            A Python integer representing a pointer to a stream. Currently not supported.
+            Stream is provided by the consumer to the producer to instruct the producer
+            to ensure that operations can safely be performed on the array.
+
+        Returns
+        -------
+        capsule : PyCapsule
+            A DLPack capsule for the array, pointing to a DLManagedTensor.
+        """
+    def __dlpack_device__(self) -> tuple[int, int]:
+        """
+        Return the DLPack device tuple this arrays resides on.
+
+        Returns
+        -------
+        tuple : Tuple[int, int]
+            Tuple with index specifying the type of the device (where
+            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
+            device which is 0 by default for CPU.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the array resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the array is CPU-accessible.
+        """
+    @property
+    def statistics(self) -> ArrayStatistics | None:
+        """
+        Statistics of the array.
+        """
+
+class NullArray(Array[scalar.NullScalar]): ...
+
+class BooleanArray(Array[scalar.BooleanScalar]):
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+class NumericArray(Array[_ScalarT]): ...
+class IntegerArray(NumericArray[_ScalarT]): ...
+class FloatingPointArray(NumericArray[_ScalarT]): ...
+class Int8Array(IntegerArray[scalar.Int8Scalar]): ...
+class UInt8Array(IntegerArray[scalar.UInt8Scalar]): ...
+class Int16Array(IntegerArray[scalar.Int16Scalar]): ...
+class UInt16Array(IntegerArray[scalar.UInt16Scalar]): ...
+class Int32Array(IntegerArray[scalar.Int32Scalar]): ...
+class UInt32Array(IntegerArray[scalar.UInt32Scalar]): ...
+class Int64Array(IntegerArray[scalar.Int64Scalar]): ...
+class UInt64Array(IntegerArray[scalar.UInt64Scalar]): ...
+class Date32Array(NumericArray[scalar.Date32Scalar]): ...
+class Date64Array(NumericArray[scalar.Date64Scalar]): ...
+class TimestampArray(NumericArray[scalar.TimestampScalar[types._Unit, types._Tz]]): ...
+class Time32Array(NumericArray[scalar.Time32Scalar[types._Time32Unit]]): ...
+class Time64Array(NumericArray[scalar.Time64Scalar[types._Time64Unit]]): ...
+class DurationArray(NumericArray[scalar.DurationScalar[types._Unit]]): ...
+class MonthDayNanoIntervalArray(Array[scalar.MonthDayNanoIntervalScalar]): ...
+class HalfFloatArray(FloatingPointArray[scalar.HalfFloatScalar]): ...
+class FloatArray(FloatingPointArray[scalar.FloatScalar]): ...
+class DoubleArray(FloatingPointArray[scalar.DoubleScalar]): ...
+class FixedSizeBinaryArray(Array[scalar.FixedSizeBinaryScalar]): ...
+class Decimal32Array(FixedSizeBinaryArray): ...
+class Decimal64Array(FixedSizeBinaryArray): ...
+class Decimal128Array(FixedSizeBinaryArray): ...
+class Decimal256Array(FixedSizeBinaryArray): ...
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array: ...
+    def value_parent_indices(self) -> Int64Array: ...
+    def value_lengths(self) -> Int32Array: ...
+
+class ListArray(BaseListArray[_ScalarT]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[int],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.Int64Type]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[float],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.Float64Type]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[str],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.StringType]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[bytes],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.BinaryType]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list,
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: Array | list,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct ListArray from arrays of int32 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_array : ListArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # nulls in the offsets array become null lists
+        >>> offsets = pa.array([0, None, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the ListArray
+        ignoring the array's offset.
+
+        If any of the list elements are null, but are backed by a
+        non-empty sub-list, those elements will be included in the
+        output.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        ListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, 4, None, 6]])
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        If an array is sliced, the slice still uses the same
+        underlying data as the original array, just with an
+        offset. Since values ignores the offset, the values are the
+        same:
+
+        >>> sliced = array.slice(1, 2)
+        >>> sliced
+        <pyarrow.lib.ListArray object at ...>
+        [
+          null,
+          [
+            3,
+            4,
+            null,
+            6
+          ]
+        ]
+        >>> sliced.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        """
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, 4, 5]])
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          2,
+          2,
+          5
+        ]
+        """
+
+class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct LargeListArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_array : LargeListArray
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        If any of the list elements are null, but are backed by a
+        non-empty sub-list, those elements will be included in the
+        output.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from the sub-lists:
+
+        >>> import pyarrow as pa
+        >>> array = pa.array(
+        ...     [[1, 2], None, [3, 4, None, 6]],
+        ...     type=pa.large_list(pa.int32()),
+        ... )
+        >>> array.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        If an array is sliced, the slice still uses the same
+        underlying data as the original array, just with an
+        offset. Since values ignores the offset, the values are the
+        same:
+
+        >>> sliced = array.slice(1, 2)
+        >>> sliced
+        <pyarrow.lib.LargeListArray object at ...>
+        [
+          null,
+          [
+            3,
+            4,
+            null,
+            6
+          ]
+        ]
+        >>> sliced.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+        """
+    @property
+    def offsets(self) -> Int64Array:
+        """
+        Return the list offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+        """
+
+class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct ListViewArray from arrays of int32 offsets, sizes, and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        sizes : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : ListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the ListViewArray
+        ignoring the array's offset and sizes.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        Examples
+        --------
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+    @property
+    def sizes(self) -> Int32Array:
+        """
+        Return the list sizes as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+
+class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct LargeListViewArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        sizes : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : LargeListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+    @property
+    def offsets(self) -> Int64Array:
+        """
+        Return the list view offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+    @property
+    def sizes(self) -> Int64Array:
+        """
+        Return the list view sizes as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+
+class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _Size]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, None]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        limit_size: _Size,
+        *,
+        type: None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct FixedSizeListArray from array of values and a list length.
+
+        Parameters
+        ----------
+        values : Array (any type)
+        list_size : int
+            The fixed length of the lists.
+        type : DataType, optional
+            If not specified, a default ListType with the values' type and
+            `list_size` length is used.
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+
+        Returns
+        -------
+        FixedSizeListArray
+
+        Examples
+        --------
+
+        Create from a values array and a list size:
+
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, 2)
+        >>> arr
+        <pyarrow.lib.FixedSizeListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+
+        Or create from a values array, list size and matching type:
+
+        >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
+        >>> arr
+        <pyarrow.lib.FixedSizeListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> BaseListArray[scalar.ListScalar[_DataTypeT]]:
+        """
+        Return the underlying array of values which backs the
+        FixedSizeListArray.
+
+        Note even null elements are included.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        sub-list values.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        FixedSizeListArray.flatten : ...
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, None]], type=pa.list_(pa.int32(), 2))
+        >>> array.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          null,
+          null,
+          3,
+          null
+        ]
+
+        """
+
+_MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
+_MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
+
+class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        keys: Array[Scalar[_MapKeyT]],
+        items: Array[Scalar[_MapItemT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+    @overload
+    @classmethod
+    def from_arrays(  # pyright: ignore[reportIncompatibleMethodOverride]
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: MapType[_MapKeyT, _MapItemT],
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):  # pyright: ignore[reportIncompatibleMethodOverride]
+        """
+        Construct MapArray from arrays of int32 offsets and key, item arrays.
+
+        Parameters
+        ----------
+        offsets : array-like or sequence (int32 type)
+        keys : array-like or sequence (any type)
+        items : array-like or sequence (any type)
+        type : DataType, optional
+            If not specified, a default MapArray with the keys' and items' type is used.
+        pool : MemoryPool
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        map_array : MapArray
+
+        Examples
+        --------
+        First, let's understand the structure of our dataset when viewed in a rectangular data model.
+        The total of 5 respondents answered the question "How much did you like the movie x?".
+        The value -1 in the integer array means that the value is missing. The boolean array
+        represents the null bitmask corresponding to the missing values in the integer array.
+
+        >>> import pyarrow as pa
+        >>> movies_rectangular = np.ma.masked_array(
+        ...     [[10, -1, -1], [8, 4, 5], [-1, 10, 3], [-1, -1, -1], [-1, -1, -1]],
+        ...     [
+        ...         [False, True, True],
+        ...         [False, False, False],
+        ...         [True, False, False],
+        ...         [True, True, True],
+        ...         [True, True, True],
+        ...     ],
+        ... )
+
+        To represent the same data with the MapArray and from_arrays, the data is
+        formed like this:
+
+        >>> offsets = [
+        ...     0,  #  -- row 1 start
+        ...     1,  #  -- row 2 start
+        ...     4,  #  -- row 3 start
+        ...     6,  #  -- row 4 start
+        ...     6,  #  -- row 5 start
+        ...     6,  #  -- row 5 end
+        ... ]
+        >>> movies = [
+        ...     "Dark Knight",  #  ---------------------------------- row 1
+        ...     "Dark Knight",
+        ...     "Meet the Parents",
+        ...     "Superman",  #  -- row 2
+        ...     "Meet the Parents",
+        ...     "Superman",  #  ----------------- row 3
+        ... ]
+        >>> likings = [
+        ...     10,  #  -------- row 1
+        ...     8,
+        ...     4,
+        ...     5,  #  --- row 2
+        ...     10,
+        ...     3,  #  ------ row 3
+        ... ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 3)]
+        3                                                   []
+        4                                                   []
+        dtype: object
+
+        If the data in the empty rows needs to be marked as missing, it's possible
+        to do so by modifying the offsets argument, so that we specify `None` as
+        the starting positions of the rows we want marked as missing. The end row
+        offset still has to refer to the existing value from keys (and values):
+
+        >>> offsets = [
+        ...     0,  #  ----- row 1 start
+        ...     1,  #  ----- row 2 start
+        ...     4,  #  ----- row 3 start
+        ...     None,  #  -- row 4 start
+        ...     None,  #  -- row 5 start
+        ...     6,  #  ----- row 5 end
+        ... ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 3)]
+        3                                                 None
+        4                                                 None
+        dtype: object
+        """
+    @property
+    def keys(self) -> Array:
+        """Flattened array of keys across all maps in array"""
+    @property
+    def items(self) -> Array:
+        """Flattened array of items across all maps in array"""
+
+class UnionArray(Array[scalar.UnionScalar]):
+    @deprecated("Use fields() instead")
+    def child(self, pos: int) -> Field:
+        """
+        DEPRECATED, use field() instead.
+
+        Parameters
+        ----------
+        pos : int
+            The physical index of the union child field (not its type code).
+
+        Returns
+        -------
+        field : pyarrow.Field
+            The given child field.
+        """
+    def field(self, pos: int) -> Array:
+        """
+        Return the given child field as an individual array.
+
+        For sparse unions, the returned array has its offset, length,
+        and null count adjusted.
+
+        For dense unions, the returned array is unchanged.
+
+        Parameters
+        ----------
+        pos : int
+            The physical index of the union child field (not its type code).
+
+        Returns
+        -------
+        field : Array
+            The given child field.
+        """
+    @property
+    def type_codes(self) -> Int8Array:
+        """Get the type codes array."""
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Get the value offsets array (dense arrays only).
+
+        Does not account for any slice offset.
+        """
+    @staticmethod
+    def from_dense(
+        type: Int8Array,
+        value_offsets: Int32Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray:
+        """
+        Construct dense UnionArray from arrays of int8 types, int32 offsets and
+        children arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        value_offsets : Array (int32 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+    @staticmethod
+    def from_sparse(
+        types: Int8Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray:
+        """
+        Construct sparse UnionArray from arrays of int8 types and children
+        arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+
+class StringArray(Array[scalar.StringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray:
+        """
+        Construct a StringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+
+class LargeStringArray(Array[scalar.LargeStringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray:
+        """
+        Construct a LargeStringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+
+class StringViewArray(Array[scalar.StringViewScalar]): ...
+
+class BinaryArray(Array[scalar.BinaryScalar]):
+    @property
+    def total_values_length(self) -> int:
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this BinaryArray.
+        """
+
+class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
+    @property
+    def total_values_length(self) -> int:
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this LargeBinaryArray.
+        """
+
+class BinaryViewArray(Array[scalar.BinaryViewScalar]): ...
+
+class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
+    def dictionary_encode(self) -> Self: ...  # type: ignore[override]
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]:
+        """
+        Decodes the DictionaryArray to an Array.
+        """
+    @property
+    def indices(self) -> Array[Scalar[_IndexT]]: ...
+    @property
+    def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: _BasicValueT,
+        length: int,
+        buffers: list[Buffer],
+        dictionary: Array | np.ndarray | pd.Series,
+        null_count: int = -1,
+        offset: int = 0,
+    ) -> DictionaryArray[Any, _BasicValueT]:
+        """
+        Construct a DictionaryArray from buffers.
+
+        Parameters
+        ----------
+        type : pyarrow.DataType
+        length : int
+            The number of values in the array.
+        buffers : List[Buffer]
+            The buffers backing the indices array.
+        dictionary : pyarrow.Array, ndarray or pandas.Series
+            The array of values referenced by the indices.
+        null_count : int, default -1
+            The number of null entries in the indices array. Negative value means that
+            the null count is not known.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+
+        Returns
+        -------
+        dict_array : DictionaryArray
+        """
+    @staticmethod
+    def from_arrays(
+        indices: Indices,
+        dictionary: Array | np.ndarray | pd.Series,
+        mask: np.ndarray | pd.Series | BooleanArray | None = None,
+        ordered: bool = False,
+        from_pandas: bool = False,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> DictionaryArray:
+        """
+        Construct a DictionaryArray from indices and values.
+
+        Parameters
+        ----------
+        indices : pyarrow.Array, numpy.ndarray or pandas.Series, int type
+            Non-negative integers referencing the dictionary values by zero
+            based index.
+        dictionary : pyarrow.Array, ndarray or pandas.Series
+            The array of values referenced by the indices.
+        mask : ndarray or pandas.Series, bool type
+            True values indicate that indices are actually null.
+        ordered : bool, default False
+            Set to True if the category values are ordered.
+        from_pandas : bool, default False
+            If True, the indices should be treated as though they originated in
+            a pandas.Categorical (null encoded as -1).
+        safe : bool, default True
+            If True, check that the dictionary indices are in range.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise uses default pool.
+
+        Returns
+        -------
+        dict_array : DictionaryArray
+        """
+
+class StructArray(Array[scalar.StructScalar]):
+    def field(self, index: int | str) -> Array:
+        """
+        Retrieves the child array belonging to field.
+
+        Parameters
+        ----------
+        index : Union[int, str]
+            Index / position or name of the field.
+
+        Returns
+        -------
+        result : Array
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]:
+        """
+        Return one individual array for each field in the struct.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        result : List[Array]
+        """
+    @staticmethod
+    def from_arrays(
+        arrays: Iterable[Array],
+        names: list[str] | None = None,
+        fields: list[Field] | None = None,
+        mask=None,
+        memory_pool: MemoryPool | None = None,
+        type: types.StructType | None = None,
+    ) -> StructArray:
+        """
+        Construct StructArray from collection of arrays representing
+        each field in the struct.
+
+        Either field names, field instances or a struct type must be passed.
+
+        Parameters
+        ----------
+        arrays : sequence of Array
+        names : List[str] (optional)
+            Field names for each struct child.
+        fields : List[Field] (optional)
+            Field instances for each struct child.
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
+        memory_pool : MemoryPool (optional)
+            For memory allocations, if required, otherwise uses default pool.
+        type : pyarrow.StructType (optional)
+            Struct type for name and type of each child.
+
+        Returns
+        -------
+        result : StructArray
+        """
+    def sort(self, order: Order = "ascending", by: str | None = None, **kwargs) -> StructArray:
+        """
+        Sort the StructArray
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        by : str or None, default None
+            If to sort the array by one of its fields
+            or by the whole array.
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : StructArray
+        """
+
+class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int16Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int16Type, _BasicValueT]: ...
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int32Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int32Type, _BasicValueT]: ...
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int64Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int64Type, _BasicValueT]: ...
+    @staticmethod
+    def from_arrays(*args, **kwargs):
+        """
+        Construct RunEndEncodedArray from run_ends and values arrays.
+
+        Parameters
+        ----------
+        run_ends : Array (int16, int32, or int64 type)
+            The run_ends array.
+        values : Array (any type)
+            The values array.
+        type : pyarrow.DataType, optional
+            The run_end_encoded(run_end_type, value_type) array type.
+
+        Returns
+        -------
+        RunEndEncodedArray
+        """
+    @staticmethod
+    def from_buffers(  # pyright: ignore[reportIncompatibleMethodOverride]
+        type: DataType,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: tuple[Array, Array] | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]:
+        """
+        Construct a RunEndEncodedArray from all the parameters that make up an
+        Array.
+
+        RunEndEncodedArrays do not have buffers, only children arrays, but this
+        implementation is needed to satisfy the Array interface.
+
+        Parameters
+        ----------
+        type : DataType
+            The run_end_encoded(run_end_type, value_type) type.
+        length : int
+            The logical length of the run-end encoded array. Expected to match
+            the last value of the run_ends array (children[0]) minus the offset.
+        buffers : List[Buffer]
+            Empty List or [None].
+        null_count : int, default -1
+            The number of null entries in the array. Run-end encoded arrays
+            are specified to not have valid bits and null_count always equals 0.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+        children : List[Array]
+            Nested type children containing the run_ends and values arrays.
+
+        Returns
+        -------
+        RunEndEncodedArray
+        """
+    @property
+    def run_ends(self) -> Array[scalar.Scalar[_RunEndType]]:
+        """
+        An array holding the logical indexes of each run-end.
+
+        The physical offset to the array is applied.
+        """
+    @property
+    def values(self) -> Array[scalar.Scalar[_BasicValueT]]:
+        """
+        An array holding the values of each run.
+
+        The physical offset to the array is applied.
+        """
+    def find_physical_offset(self) -> int:
+        """
+        Find the physical offset of this REE array.
+
+        This is the offset of the run that contains the value of the first
+        logical element of this array considering its offset.
+
+        This function uses binary-search, so it has a O(log N) cost.
+        """
+    def find_physical_length(self) -> int:
+        """
+        Find the physical length of this REE array.
+
+        The physical length of an REE is the number of physical values (and
+        run-ends) necessary to represent the logical range of values from offset
+        to length.
+
+        This function uses binary-search, so it has a O(log N) cost.
+        """
+
+_ArrayT = TypeVar("_ArrayT", bound=Array)
+
+class ExtensionArray(Array[scalar.ExtensionScalar], Generic[_ArrayT]):
+    @property
+    def storage(self) -> Any: ...
+    @staticmethod
+    def from_storage(typ: types.BaseExtensionType, storage: _ArrayT) -> ExtensionArray[_ArrayT]:
+        """
+        Construct ExtensionArray from type and storage array.
+
+        Parameters
+        ----------
+        typ : DataType
+            The extension type for the result array.
+        storage : Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        ext_array : ExtensionArray
+        """
+
+class JsonArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for Arrow arrays of JSON data type.
+
+    This does not guarantee that the JSON data actually
+    is valid JSON.
+
+    Examples
+    --------
+    Define the extension type for JSON array
+
+    >>> import pyarrow as pa
+    >>> json_type = pa.json_(pa.large_utf8())
+
+    Create an extension array
+
+    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
+    >>> storage = pa.array(arr, pa.large_utf8())
+    >>> pa.ExtensionArray.from_storage(json_type, storage)
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      null,
+      "{ "id":30, "values":["a", "b"] }"
+    ]
+    """
+
+class UuidArray(ExtensionArray[_ArrayT]): ...
+
+class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for fixed shape tensor extension arrays.
+
+    Examples
+    --------
+    Define the extension type for tensor array
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+
+    Create an extension array
+
+    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    >>> pa.ExtensionArray.from_storage(tensor_type, storage)
+    <pyarrow.lib.FixedShapeTensorArray object at ...>
+    [
+      [
+        1,
+        2,
+        3,
+        4
+      ],
+      [
+        10,
+        20,
+        30,
+        40
+      ],
+      [
+        100,
+        200,
+        300,
+        400
+      ]
+    ]
+    """
+
+    def to_numpy_ndarray(self) -> np.ndarray:
+        """
+        Convert fixed shape tensor extension array to a multi-dimensional numpy.ndarray.
+
+        The resulting ndarray will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+            Ndarray representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert fixed shape tensor extension array to a pyarrow.Tensor.
+
+        The resulting Tensor will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+
+    @classmethod
+    def from_numpy_ndarray(cls, obj: np.ndarray) -> Self:
+        """
+        Convert numpy tensors (ndarrays) to a fixed shape tensor extension array.
+        The first dimension of ndarray will become the length of the fixed
+        shape tensor array.
+        If input array data is not contiguous a copy will be made.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
+        >>> pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+        <pyarrow.lib.FixedShapeTensorArray object at ...>
+        [
+          [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6
+          ],
+          [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6
+          ]
+        ]
+        """
+
+class OpaqueArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for opaque extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an opaque array
+
+    >>> import pyarrow as pa
+    >>> opaque_type = pa.opaque(
+    ...     pa.binary(),
+    ...     type_name="geometry",
+    ...     vendor_name="postgis",
+    ... )
+
+    Create an extension array
+
+    >>> arr = [None, b"data"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> pa.ExtensionArray.from_storage(opaque_type, storage)
+    <pyarrow.lib.OpaqueArray object at ...>
+    [
+      null,
+      64617461
+    ]
+    """
+
+class Bool8Array(ExtensionArray):
+    """
+    Concrete class for bool8 extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an bool8 array
+
+    >>> import pyarrow as pa
+    >>> bool8_type = pa.bool8()
+
+    Create an extension array
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> pa.ExtensionArray.from_storage(bool8_type, storage)
+    <pyarrow.lib.Bool8Array object at ...>
+    [
+      -1,
+      0,
+      1,
+      2,
+      null
+    ]
+    """
+
+    def to_numpy(self, zero_copy_only: bool = ..., writable: bool = ...) -> np.ndarray:
+        """
+        Return a NumPy bool view or copy of this array.
+
+        By default, tries to return a view of this array. This is only
+        supported for arrays without any nulls.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+    @classmethod
+    def from_storage(cls, storage: Int8Array) -> Self:  # type: ignore[override]
+        """
+        Construct Bool8Array from Int8Array storage.
+
+        Parameters
+        ----------
+        storage : Int8Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+        """
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray) -> Self:
+        """
+        Convert numpy array to a bool8 extension array without making a copy.
+        The input array must be 1-dimensional, with either bool_ or int8 dtype.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([True, False, True], dtype=np.bool_)
+        >>> pa.Bool8Array.from_numpy(arr)
+        <pyarrow.lib.Bool8Array object at ...>
+        [
+          1,
+          0,
+          1
+        ]
+        """
+
+def concat_arrays(arrays: Iterable[_ArrayT], memory_pool: MemoryPool | None = None) -> _ArrayT:
+    """
+    Concatenate the given arrays.
+
+    The contents of the input arrays are copied into the returned array.
+
+    Raises
+    ------
+    ArrowInvalid
+        If not all of the arrays have the same type.
+
+    Parameters
+    ----------
+    arrays : iterable of pyarrow.Array
+        Arrays to concatenate, must be identically typed.
+    memory_pool : MemoryPool, default None
+        For memory allocations. If None, the default pool is used.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr1 = pa.array([2, 4, 5, 100])
+    >>> arr2 = pa.array([2, 4])
+    >>> pa.concat_arrays([arr1, arr2])
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      2,
+      4,
+      5,
+      100,
+      2,
+      4
+    ]
+
+    """
+
+def _empty_array(type: _DataTypeT) -> Array[scalar.Scalar[_DataTypeT]]:
+    """
+    Create empty array of the given type.
+    """
+
+__all__ = [
+    "array",
+    "asarray",
+    "nulls",
+    "repeat",
+    "infer_type",
+    "_PandasConvertible",
+    "Array",
+    "NullArray",
+    "BooleanArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "FixedSizeBinaryArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "BaseListArray",
+    "ListArray",
+    "LargeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "FixedSizeListArray",
+    "MapArray",
+    "UnionArray",
+    "StringArray",
+    "LargeStringArray",
+    "StringViewArray",
+    "BinaryArray",
+    "LargeBinaryArray",
+    "BinaryViewArray",
+    "DictionaryArray",
+    "StructArray",
+    "RunEndEncodedArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "FixedShapeTensorArray",
+    "concat_arrays",
+    "_empty_array",
+]
diff --git a/python/stubs/__lib_pxi/benchmark.pyi b/python/stubs/__lib_pxi/benchmark.pyi
new file mode 100644
index 00000000000..66981bf0f51
--- /dev/null
+++ b/python/stubs/__lib_pxi/benchmark.pyi
@@ -0,0 +1 @@
+def benchmark_PandasObjectIsNull(list) -> None: ...  # noqa: N802
diff --git a/python/stubs/__lib_pxi/builder.pyi b/python/stubs/__lib_pxi/builder.pyi
new file mode 100644
index 00000000000..4a0e9ca4708
--- /dev/null
+++ b/python/stubs/__lib_pxi/builder.pyi
@@ -0,0 +1,89 @@
+from typing import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+class StringBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 strings.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+class StringViewBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 string views.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string_view').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringViewArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/stubs/__lib_pxi/compat.pyi b/python/stubs/__lib_pxi/compat.pyi
new file mode 100644
index 00000000000..ae667be453e
--- /dev/null
+++ b/python/stubs/__lib_pxi/compat.pyi
@@ -0,0 +1,5 @@
+def encode_file_path(path: str | bytes) -> bytes: ...
+def tobytes(o: str | bytes) -> bytes: ...
+def frombytes(o: bytes, *, safe: bool = False): ...
+
+__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/stubs/__lib_pxi/config.pyi b/python/stubs/__lib_pxi/config.pyi
new file mode 100644
index 00000000000..166e10c9734
--- /dev/null
+++ b/python/stubs/__lib_pxi/config.pyi
@@ -0,0 +1,41 @@
+from typing import NamedTuple
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+class BuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+cpp_build_info: BuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "RuntimeInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/stubs/__lib_pxi/device.pyi b/python/stubs/__lib_pxi/device.pyi
new file mode 100644
index 00000000000..d1b9f39eedd
--- /dev/null
+++ b/python/stubs/__lib_pxi/device.pyi
@@ -0,0 +1,88 @@
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+class DeviceAllocationType(enum.Flag):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+class Device(_Weakrefable):
+    """
+    Abstract interface for hardware devices
+
+    This object represents a device with access to some memory spaces.
+    When handling a Buffer or raw memory address, it allows deciding in which
+    context the raw memory address should be interpreted
+    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+    """
+
+    @property
+    def type_name(self) -> str:
+        """
+        A shorthand for this device's type.
+        """
+    @property
+    def device_id(self) -> int:
+        """
+        A device ID to identify this device if there are multiple of this type.
+
+        If there is no "device_id" equivalent (such as for the main CPU device on
+        non-numa systems) returns -1.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this device is the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory address
+        is CPU-accessible.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        Return the DeviceAllocationType of this device.
+        """
+
+class MemoryManager(_Weakrefable):
+    """
+    An object that provides memory management primitives.
+
+    A MemoryManager is always tied to a particular Device instance.
+    It can also have additional parameters (such as a MemoryPool to
+    allocate CPU memory).
+
+    """
+    @property
+    def device(self) -> Device:
+        """
+        The device this MemoryManager is tied to.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this MemoryManager is tied to the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory
+        address is CPU-accessible.
+        """
+
+def default_cpu_memory_manager() -> MemoryManager:
+    """
+    Return the default CPU MemoryManager instance.
+
+    The returned singleton instance uses the default MemoryPool.
+    """
+
+__all__ = ["DeviceAllocationType", "Device", "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/stubs/__lib_pxi/error.pyi b/python/stubs/__lib_pxi/error.pyi
new file mode 100644
index 00000000000..981ed51e680
--- /dev/null
+++ b/python/stubs/__lib_pxi/error.pyi
@@ -0,0 +1,53 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+class ArrowException(Exception): ...
+class ArrowInvalid(ValueError, ArrowException): ...
+class ArrowMemoryError(MemoryError, ArrowException): ...
+class ArrowKeyError(KeyError, ArrowException): ...
+class ArrowTypeError(TypeError, ArrowException): ...
+class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
+class ArrowCapacityError(ArrowException): ...
+class ArrowIndexError(IndexError, ArrowException): ...
+class ArrowSerializationError(ArrowException): ...
+
+class ArrowCancelled(ArrowException):
+    signum: int | None
+    def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+ArrowIOError = IOError
+
+class StopToken: ...
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+have_signal_refcycle: bool
+
+class SignalStopHandler:
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+    def __dealloc__(self) -> None: ...
+    @property
+    def stop_token(self) -> StopToken: ...
+
+__all__ = [
+    "ArrowException",
+    "ArrowInvalid",
+    "ArrowMemoryError",
+    "ArrowKeyError",
+    "ArrowTypeError",
+    "ArrowNotImplementedError",
+    "ArrowCapacityError",
+    "ArrowIndexError",
+    "ArrowSerializationError",
+    "ArrowCancelled",
+    "ArrowIOError",
+    "StopToken",
+    "enable_signal_handlers",
+    "have_signal_refcycle",
+    "SignalStopHandler",
+]
diff --git a/python/stubs/__lib_pxi/io.pyi b/python/stubs/__lib_pxi/io.pyi
new file mode 100644
index 00000000000..d882fd79d57
--- /dev/null
+++ b/python/stubs/__lib_pxi/io.pyi
@@ -0,0 +1,1474 @@
+import sys
+
+from collections.abc import Callable
+from io import IOBase
+
+from _typeshed import StrPath
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Any, Literal, SupportsIndex, overload
+
+from pyarrow._stubs_typing import Compression, SupportPyBuffer
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .device import Device, DeviceAllocationType, MemoryManager
+from .types import KeyValueMetadata
+
+def have_libhdfs() -> bool:
+    """
+    Return true if HDFS (HadoopFileSystem) library is set up correctly.
+    """
+
+def io_thread_count() -> int:
+    """
+    Return the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool. The number of threads is set to a fixed value at
+    startup. It can be modified at runtime by calling
+    :func:`set_io_thread_count()`.
+
+    See Also
+    --------
+    set_io_thread_count : Modify the size of this pool.
+    cpu_count : The analogous function for the CPU thread pool.
+    """
+
+def set_io_thread_count(count: int) -> None:
+    """
+    Set the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool.
+
+    Parameters
+    ----------
+    count : int
+        The max number of threads that may be used for I/O.
+        Must be positive.
+
+    See Also
+    --------
+    io_thread_count : Get the size of this pool.
+    set_cpu_count : The analogous function for the CPU thread pool.
+    """
+
+Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
+
+class NativeFile(_Weakrefable):
+    """
+    The base class for all Arrow streams.
+
+    Streams are either readable, writable, or both.
+    They optionally support seeking.
+
+    While this class exposes methods to read or write data from Python, the
+    primary intent of using a Arrow stream is to pass it to other Arrow
+    facilities that will make use of it, such as Arrow IPC routines.
+
+    Be aware that there are subtle differences with regular Python files,
+    e.g. destroying a writable Arrow stream without closing it explicitly
+    will not flush any pending data.
+    """
+
+    _default_chunk_size: int
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args) -> None: ...
+    @property
+    def mode(self) -> Mode:
+        """
+        The file mode. Currently instances of NativeFile may support:
+
+        * rb: binary read
+        * wb: binary write
+        * rb+: binary read and write
+        * ab: binary append
+        """
+    def readable(self) -> bool: ...
+    def seekable(self) -> bool: ...
+    def isatty(self) -> bool: ...
+    def fileno(self) -> int: ...
+    @property
+    def closed(self) -> bool: ...
+    def close(self) -> None: ...
+    def size(self) -> int:
+        """
+        Return file size
+        """
+    def metadata(self) -> KeyValueMetadata:
+        """
+        Return file metadata
+        """
+    def tell(self) -> int:
+        """
+        Return current stream position
+        """
+    def seek(self, position: int, whence: int = 0) -> int:
+        """
+        Change current file stream position
+
+        Parameters
+        ----------
+        position : int
+            Byte offset, interpreted relative to value of whence argument
+        whence : int, default 0
+            Point of reference for seek offset
+
+        Notes
+        -----
+        Values of whence:
+        * 0 -- start of stream (the default); offset should be zero or positive
+        * 1 -- current stream position; offset may be negative
+        * 2 -- end of stream; offset is usually negative
+
+        Returns
+        -------
+        int
+            The new absolute stream position.
+        """
+    def flush(self) -> None:
+        """
+        Flush the stream, if applicable.
+
+        An error is raised if stream is not writable.
+        """
+    def write(self, data: bytes | SupportPyBuffer) -> int:
+        """
+        Write data to the file.
+
+        Parameters
+        ----------
+        data : bytes-like object or exporter of buffer protocol
+
+        Returns
+        -------
+        int
+            nbytes: number of bytes written
+        """
+    def read(self, nbytes: int | None = None) -> bytes:
+        """
+        Read and return up to n bytes.
+
+        If *nbytes* is None, then the entire remaining file contents are read.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+
+        Returns
+        -------
+        data : bytes
+        """
+    def get_stream(self, file_offset: int, nbytes: int) -> Self:
+        """
+        Return an input stream that reads a file segment independent of the
+        state of the file.
+
+        Allows reading portions of a random access file as an input stream
+        without interfering with each other.
+
+        Parameters
+        ----------
+        file_offset : int
+        nbytes : int
+
+        Returns
+        -------
+        stream : NativeFile
+        """
+    def read_at(self) -> bytes:
+        """
+        Read indicated number of bytes at offset from the file
+
+        Parameters
+        ----------
+        nbytes : int
+        offset : int
+
+        Returns
+        -------
+        data : bytes
+        """
+    def read1(self) -> bytes:
+        """Read and return up to n bytes.
+
+        Unlike read(), if *nbytes* is None then a chunk is read, not the
+        entire file.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+            The maximum number of bytes to read.
+
+        Returns
+        -------
+        data : bytes
+        """
+    def readall(self) -> bytes: ...
+    def readinto(self, b: SupportPyBuffer) -> int:
+        """
+        Read into the supplied buffer
+
+        Parameters
+        ----------
+        b : buffer-like object
+            A writable buffer object (such as a bytearray).
+
+        Returns
+        -------
+        written : int
+            number of bytes written
+        """
+
+    def readline(self, size: int | None = None) -> bytes:
+        """Read and return a line of bytes from the file.
+
+        If size is specified, read at most size bytes.
+
+        Line terminator is always b"\\n".
+
+        Parameters
+        ----------
+        size : int
+            maximum number of bytes read
+        """
+    def readlines(self, hint: int | None = None) -> list[bytes]:
+        """Read lines of the file
+
+        Parameters
+        ----------
+        hint : int
+            maximum number of bytes read until we stop
+        """
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> bytes: ...
+    def read_buffer(self, nbytes: int | None = None) -> Buffer:
+        """
+        Read from buffer.
+
+        Parameters
+        ----------
+        nbytes : int, optional
+            maximum number of bytes read
+        """
+    def truncate(self) -> None: ...
+    def writelines(self, lines: list[bytes]):
+        """
+        Write lines to the file.
+
+        Parameters
+        ----------
+        lines : iterable
+            Iterable of bytes-like objects or exporters of buffer protocol
+        """
+    def download(self, stream_or_path: StrPath | IOBase, buffer_size: int | None = None) -> None:
+        """
+        Read this file completely to a local path or destination stream.
+
+        This method first seeks to the beginning of the file.
+
+        Parameters
+        ----------
+        stream_or_path : str or file-like object
+            If a string, a local file path to write to; otherwise,
+            should be a writable stream.
+        buffer_size : int, optional
+            The buffer size to use for data transfers.
+        """
+    def upload(self, stream: IOBase, buffer_size: int | None) -> None:
+        """
+        Write from a source stream to this file.
+
+        Parameters
+        ----------
+        stream : file-like object
+            Source stream to pipe to this file.
+        buffer_size : int, optional
+            The buffer size to use for data transfers.
+        """
+
+# ----------------------------------------------------------------------
+# Python file-like objects
+
+class PythonFile(NativeFile):
+    """
+    A stream backed by a Python file object.
+
+    This class allows using Python file objects with arbitrary Arrow
+    functions, including functions written in another language than Python.
+
+    As a downside, there is a non-zero redirection cost in translating
+    Arrow stream calls to Python method calls.  Furthermore, Python's
+    Global Interpreter Lock may limit parallelism in some situations.
+
+    Examples
+    --------
+    >>> import io
+    >>> import pyarrow as pa
+    >>> pa.PythonFile(io.BytesIO())
+    <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Create a stream for writing:
+
+    >>> buf = io.BytesIO()
+    >>> f = pa.PythonFile(buf, mode="w")
+    >>> f.writable()
+    True
+    >>> f.write(b"PythonFile")
+    10
+    >>> buf.getvalue()
+    b'PythonFile'
+    >>> f.close()
+    >>> f
+    <pyarrow.PythonFile closed=True own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Create a stream for reading:
+
+    >>> buf = io.BytesIO(b"PythonFile")
+    >>> f = pa.PythonFile(buf, mode="r")
+    >>> f.mode
+    'rb'
+    >>> f.read()
+    b'PythonFile'
+    >>> f
+    <pyarrow.PythonFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    >>> f.close()
+    >>> f
+    <pyarrow.PythonFile closed=True own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    def __init__(self, handle: IOBase, mode: Literal["r", "w"] | None = None) -> None: ...
+    def truncate(self, pos: int | None = None) -> None:
+        """
+        Parameters
+        ----------
+        pos : int, optional
+        """
+
+class MemoryMappedFile(NativeFile):
+    """
+    A stream that represents a memory-mapped file.
+
+    Supports 'r', 'r+', 'w' modes.
+
+    Examples
+    --------
+    Create a new file with memory map:
+
+    >>> import pyarrow as pa
+    >>> mmap = pa.create_memory_map("example_mmap.dat", 10)
+    >>> mmap
+    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True>
+    >>> mmap.close()
+
+    Open an existing file with memory map:
+
+    >>> with pa.memory_map("example_mmap.dat") as mmap:
+    ...     mmap
+    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    @classmethod
+    def create(cls, path: str, size: int) -> Self:
+        """
+        Create a MemoryMappedFile
+
+        Parameters
+        ----------
+        path : str
+            Where to create the file.
+        size : int
+            Size of the memory mapped file.
+        """
+    def _open(self, path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"): ...
+    def resize(self, new_size: int) -> None:
+        """
+        Resize the map and underlying file.
+
+        Parameters
+        ----------
+        new_size : new size in bytes
+        """
+
+def memory_map(
+    path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"
+) -> MemoryMappedFile:
+    """
+    Open memory map at file path. Size of the memory map cannot change.
+
+    Parameters
+    ----------
+    path : str
+    mode : {'r', 'r+', 'w'}, default 'r'
+        Whether the file is opened for reading ('r'), writing ('w')
+        or both ('r+').
+
+    Returns
+    -------
+    mmap : MemoryMappedFile
+
+    Examples
+    --------
+    Reading from a memory map without any memory allocation or copying:
+
+    >>> import pyarrow as pa
+    >>> with pa.output_stream("example_mmap.txt") as stream:
+    ...     stream.write(b"Constructing a buffer referencing the mapped memory")
+    51
+    >>> with pa.memory_map("example_mmap.txt") as mmap:
+    ...     mmap.read_at(6, 45)
+    b'memory'
+    """
+
+create_memory_map = MemoryMappedFile.create
+
+class OSFile(NativeFile):
+    """
+    A stream backed by a regular file descriptor.
+
+    Examples
+    --------
+    Create a new file to write to:
+
+    >>> import pyarrow as pa
+    >>> with pa.OSFile("example_osfile.arrow", mode="w") as f:
+    ...     f.writable()
+    ...     f.write(b"OSFile")
+    ...     f.seekable()
+    True
+    6
+    False
+
+    Open the file to read:
+
+    >>> with pa.OSFile("example_osfile.arrow", mode="r") as f:
+    ...     f.mode
+    ...     f.read()
+    'rb'
+    b'OSFile'
+
+    Open the file to append:
+
+    >>> with pa.OSFile("example_osfile.arrow", mode="ab") as f:
+    ...     f.mode
+    ...     f.write(b" is super!")
+    'ab'
+    10
+    >>> with pa.OSFile("example_osfile.arrow") as f:
+    ...     f.read()
+    b'OSFile is super!'
+
+    Inspect created OSFile:
+
+    >>> pa.OSFile("example_osfile.arrow")
+    <pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    def __init__(
+        self,
+        path: str,
+        mode: Literal["r", "rb", "w", "wb", "a", "ab"],
+        memory_pool: MemoryPool | None = None,
+    ) -> None: ...
+
+class FixedSizeBufferWriter(NativeFile):
+    """
+    A stream writing to a Arrow buffer.
+
+    Examples
+    --------
+    Create a stream to write to ``pyarrow.Buffer``:
+
+    >>> import pyarrow as pa
+    >>> buf = pa.allocate_buffer(5)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(b"abcde")
+    ...     stream
+    5
+    <pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Inspect the buffer:
+
+    >>> buf.to_pybytes()
+    b'abcde'
+    >>> buf
+    <pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True>
+    """
+    def __init__(self, buffer: Buffer) -> None: ...
+    def set_memcopy_threads(self, num_threads: int) -> None: ...
+    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
+    def set_memcopy_threshold(self, threshold: int) -> None: ...
+
+# ----------------------------------------------------------------------
+# Arrow buffers
+
+class Buffer(_Weakrefable):
+    """
+    The base class for all Arrow buffers.
+
+    A buffer represents a contiguous memory area.  Many buffers will own
+    their memory, though not all of them do.
+    """
+    def __len__(self) -> int: ...
+    def _assert_cpu(self) -> None: ...
+    @property
+    def size(self) -> int:
+        """
+        The buffer size in bytes.
+        """
+    @property
+    def address(self) -> int:
+        """
+        The buffer's address, as an integer.
+
+        The returned address may point to CPU or device memory.
+        Use `is_cpu()` to disambiguate.
+        """
+    def hex(self) -> bytes:
+        """
+        Compute hexadecimal representation of the buffer.
+
+        Returns
+        -------
+        : bytes
+        """
+    @property
+    def is_mutable(self) -> bool:
+        """
+        Whether the buffer is mutable.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the buffer is CPU-accessible.
+        """
+    @property
+    def device(self) -> Device:
+        """
+        The device where the buffer resides.
+
+        Returns
+        -------
+        Device
+        """
+    @property
+    def memory_manager(self) -> MemoryManager:
+        """
+        The memory manager associated with the buffer.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the buffer resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+    @property
+    def parent(self) -> Buffer | None: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    @overload
+    def __getitem__(self, key: int) -> int: ...
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Slice this buffer.  Memory is not copied.
+
+        You can also use the Python slice notation ``buffer[start:stop]``.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of buffer to slice.
+        length : int, default None
+            Length of slice (default is until end of Buffer starting from
+            offset).
+
+        Returns
+        -------
+        sliced : Buffer
+            A logical view over this buffer.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Determine if two buffers contain exactly the same data.
+
+        Parameters
+        ----------
+        other : Buffer
+
+        Returns
+        -------
+        are_equal : bool
+            True if buffer contents and size are equal
+        """
+    def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]: ...
+    def to_pybytes(self) -> bytes:
+        """
+        Return this buffer as a Python bytes object. Memory is copied.
+        """
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class ResizableBuffer(Buffer):
+    """
+    A base class for buffers that can be resized.
+    """
+
+    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None:
+        """
+        Resize buffer to indicated size.
+
+        Parameters
+        ----------
+        new_size : int
+            New size of buffer (padding may be added internally).
+        shrink_to_fit : bool, default False
+            If this is true, the buffer is shrunk when new_size is less
+            than the current size.
+            If this is false, the buffer is never shrunk.
+        """
+
+@overload
+def allocate_buffer(size: int, memory_pool: MemoryPool | None = None) -> Buffer: ...
+@overload
+def allocate_buffer(
+    size: int, memory_pool: MemoryPool | None, resizable: Literal[False]
+) -> Buffer: ...
+@overload
+def allocate_buffer(
+    size: int, memory_pool: MemoryPool | None, resizable: Literal[True]
+) -> ResizableBuffer: ...
+def allocate_buffer(*args, **kwargs):
+    """
+    Allocate a mutable buffer.
+
+    Parameters
+    ----------
+    size : int
+        Number of bytes to allocate (plus internal padding)
+    memory_pool : MemoryPool, optional
+        The pool to allocate memory from.
+        If not given, the default memory pool is used.
+    resizable : bool, default False
+        If true, the returned buffer is resizable.
+
+    Returns
+    -------
+    buffer : Buffer or ResizableBuffer
+    """
+
+# ----------------------------------------------------------------------
+# Arrow Stream
+class BufferOutputStream(NativeFile):
+    """
+    An output stream that writes to a resizable buffer.
+
+    The buffer is produced as a result when ``getvalue()`` is called.
+
+    Examples
+    --------
+    Create an output stream, write data to it and finalize it with
+    ``getvalue()``:
+
+    >>> import pyarrow as pa
+    >>> f = pa.BufferOutputStream()
+    >>> f.write(b"pyarrow.Buffer")
+    14
+    >>> f.closed
+    False
+    >>> f.getvalue()
+    <pyarrow.Buffer address=... size=14 is_cpu=True is_mutable=True>
+    >>> f.closed
+    True
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def getvalue(self) -> Buffer:
+        """
+        Finalize output stream and return result as pyarrow.Buffer.
+
+        Returns
+        -------
+        value : Buffer
+        """
+
+class MockOutputStream(NativeFile): ...
+
+class BufferReader(NativeFile):
+    """
+    Zero-copy reader from objects convertible to Arrow buffer.
+
+    Parameters
+    ----------
+    obj : Python bytes or pyarrow.Buffer
+
+    Examples
+    --------
+    Create an Arrow input stream and inspect it:
+
+    >>> import pyarrow as pa
+    >>> data = b"reader data"
+    >>> buf = memoryview(data)
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.size()
+    ...     stream.read(6)
+    ...     stream.seek(7)
+    ...     stream.read(15)
+    11
+    b'reader'
+    7
+    b'data'
+    """
+    def __init__(self, obj) -> None: ...
+
+class CompressedInputStream(NativeFile):
+    """
+    An input stream wrapper which decompresses data on the fly.
+
+    Parameters
+    ----------
+    stream : string, path, pyarrow.NativeFile, or file-like object
+        Input stream object to wrap with the compression.
+    compression : str
+        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
+
+    Examples
+    --------
+    Create an output stream which compresses the data:
+
+    >>> import pyarrow as pa
+    >>> data = b"Compressed stream"
+    >>> raw = pa.BufferOutputStream()
+    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
+    ...     compressed.write(data)
+    17
+
+    Create an input stream with decompression referencing the
+    buffer with compressed data:
+
+    >>> cdata = raw.getvalue()
+    >>> with pa.input_stream(cdata, compression="gzip") as compressed:
+    ...     compressed.read()
+    b'Compressed stream'
+
+    which actually translates to the use of ``BufferReader``and
+    ``CompressedInputStream``:
+
+    >>> raw = pa.BufferReader(cdata)
+    >>> with pa.CompressedInputStream(raw, "gzip") as compressed:
+    ...     compressed.read()
+    b'Compressed stream'
+    """
+
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+class CompressedOutputStream(NativeFile):
+    """
+    An output stream wrapper which compresses data on the fly.
+
+    Parameters
+    ----------
+    stream : string, path, pyarrow.NativeFile, or file-like object
+        Input stream object to wrap with the compression.
+    compression : str
+        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
+
+    Examples
+    --------
+    Create an output stream which compresses the data:
+
+    >>> import pyarrow as pa
+    >>> data = b"Compressed stream"
+    >>> raw = pa.BufferOutputStream()
+    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
+    ...     compressed.write(data)
+    17
+    """
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+class BufferedInputStream(NativeFile):
+    """
+    An input stream that performs buffered reads from
+    an unbuffered input stream, which can mitigate the overhead
+    of many small reads in some cases.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The input stream to wrap with the buffer
+    buffer_size : int
+        Size of the temporary read buffer.
+    memory_pool : MemoryPool
+        The memory pool used to allocate the buffer.
+    """
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+    def detach(self) -> NativeFile:
+        """
+        Release the raw InputStream.
+        Further operations on this stream are invalid.
+
+        Returns
+        -------
+        raw : NativeFile
+            The underlying raw input stream
+        """
+
+class BufferedOutputStream(NativeFile):
+    """
+    An output stream that performs buffered reads from
+    an unbuffered output stream, which can mitigate the overhead
+    of many small writes in some cases.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The writable output stream to wrap with the buffer
+    buffer_size : int
+        Size of the buffer that should be added.
+    memory_pool : MemoryPool
+        The memory pool used to allocate the buffer.
+    """
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+    def detach(self) -> NativeFile:
+        """
+        Flush any buffered writes and release the raw OutputStream.
+        Further operations on this stream are invalid.
+
+        Returns
+        -------
+        raw : NativeFile
+            The underlying raw output stream.
+        """
+
+class TransformInputStream(NativeFile):
+    """
+    Transform an input stream.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The stream to transform.
+    transform_func : callable
+        The transformation to apply.
+    """
+    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None: ...
+
+class Transcoder:
+    def __init__(self, decoder, encoder) -> None: ...
+    def __call__(self, buf: Buffer): ...
+
+def transcoding_input_stream(
+    stream: NativeFile, src_encoding: str, dest_encoding: str
+) -> TransformInputStream:
+    """
+    Add a transcoding transformation to the stream.
+    Incoming data will be decoded according to ``src_encoding`` and
+    then re-encoded according to ``dest_encoding``.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The stream to which the transformation should be applied.
+    src_encoding : str
+        The codec to use when reading data.
+    dest_encoding : str
+        The codec to use for emitted data.
+    """
+
+def py_buffer(obj: SupportPyBuffer) -> Buffer:
+    """
+    Construct an Arrow buffer from a Python bytes-like or buffer-like object
+
+    Parameters
+    ----------
+    obj : object
+        the object from which the buffer should be constructed.
+    """
+
+def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer:
+    """
+    Construct an Arrow buffer with the given *address* and *size*.
+
+    The buffer will be optionally backed by the Python *base* object, if given.
+    The *base* object will be kept alive as long as this buffer is alive,
+    including across language boundaries (for example if the buffer is
+    referenced by C++ code).
+
+    Parameters
+    ----------
+    address : int
+        The starting address of the buffer. The address can
+        refer to both device or host memory but it must be
+        accessible from device after mapping it with
+        `get_device_address` method.
+    size : int
+        The size of device buffer in bytes.
+    base : {None, object}
+        Object that owns the referenced memory.
+    """
+
+def as_buffer(o: Buffer | SupportPyBuffer) -> Buffer: ...
+
+# ---------------------------------------------------------------------
+
+class CacheOptions(_Weakrefable):
+    """
+    Cache options for a pre-buffered fragment scan.
+
+    Parameters
+    ----------
+    hole_size_limit : int, default 8KiB
+        The maximum distance in bytes between two consecutive ranges; beyond
+        this value, ranges are not combined.
+    range_size_limit : int, default 32MiB
+        The maximum size in bytes of a combined range; if combining two
+        consecutive ranges would produce a range of a size greater than this,
+        they are not combined
+    lazy : bool, default True
+        lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
+        lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
+        needs them.
+        lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
+        range that is currently being read.
+    prefetch_limit : int, default 0
+        The maximum number of ranges to be prefetched. This is only used for
+        lazy cache to asynchronously read some ranges after reading the target
+        range.
+    """
+
+    hole_size_limit: int
+    range_size_limit: int
+    lazy: bool
+    prefetch_limit: int
+    def __init__(
+        self,
+        *,
+        hole_size_limit: int | None = None,
+        range_size_limit: int | None = None,
+        lazy: bool = True,
+        prefetch_limit: int = 0,
+    ) -> None: ...
+    @classmethod
+    def from_network_metrics(
+        cls,
+        time_to_first_byte_millis: int,
+        transfer_bandwidth_mib_per_sec: int,
+        ideal_bandwidth_utilization_frac: float = 0.9,
+        max_ideal_request_size_mib: int = 64,
+    ) -> Self:
+        """
+        Create suitable CacheOptions based on provided network metrics.
+
+        Typically this will be used with object storage solutions like Amazon S3,
+        Google Cloud Storage and Azure Blob Storage.
+
+        Parameters
+        ----------
+        time_to_first_byte_millis : int
+            Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call
+            setup latency of a new read request. The value is a positive integer.
+        transfer_bandwidth_mib_per_sec : int
+            Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive
+            integer.
+        ideal_bandwidth_utilization_frac : int, default 0.9
+            Transfer bandwidth utilization fraction (per connection) to maximize the net
+            data load. The value is a positive float less than 1.
+        max_ideal_request_size_mib : int, default 64
+            The maximum single data request size (in MiB) to maximize the net data load.
+
+        Returns
+        -------
+        CacheOptions
+        """
+
+class Codec(_Weakrefable):
+    """
+    Compression codec.
+
+    Parameters
+    ----------
+    compression : str
+        Type of compression codec to initialize, valid values are: 'gzip',
+        'bz2', 'brotli', 'lz4' (or 'lz4_frame'), 'lz4_raw', 'zstd' and
+        'snappy'.
+    compression_level : int, None
+        Optional parameter specifying how aggressively to compress.  The
+        possible ranges and effect of this parameter depend on the specific
+        codec chosen.  Higher values compress more but typically use more
+        resources (CPU/RAM).  Some codecs support negative values.
+
+        gzip
+            The compression_level maps to the memlevel parameter of
+            deflateInit2.  Higher levels use more RAM but are faster
+            and should have higher compression ratios.
+
+        bz2
+            The compression level maps to the blockSize100k parameter of
+            the BZ2_bzCompressInit function.  Higher levels use more RAM
+            but are faster and should have higher compression ratios.
+
+        brotli
+            The compression level maps to the BROTLI_PARAM_QUALITY
+            parameter.  Higher values are slower and should have higher
+            compression ratios.
+
+        lz4/lz4_frame/lz4_raw
+            The compression level parameter is not supported and must
+            be None
+
+        zstd
+            The compression level maps to the compressionLevel parameter
+            of ZSTD_initCStream.  Negative values are supported.  Higher
+            values are slower and should have higher compression ratios.
+
+        snappy
+            The compression level parameter is not supported and must
+            be None
+
+
+    Raises
+    ------
+    ValueError
+        If invalid compression value is passed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.Codec.is_available("gzip")
+    True
+    >>> codec = pa.Codec("gzip")
+    >>> codec.name
+    'gzip'
+    >>> codec.compression_level
+    9
+    """
+    def __init__(self, compression: Compression, compression_level: int | None = None) -> None: ...
+    @classmethod
+    def detect(cls, path: StrPath) -> Self:
+        """
+        Detect and instantiate compression codec based on file extension.
+
+        Parameters
+        ----------
+        path : str, path-like
+            File-path to detect compression from.
+
+        Raises
+        ------
+        TypeError
+            If the passed value is not path-like.
+        ValueError
+            If the compression can't be detected from the path.
+
+        Returns
+        -------
+        Codec
+        """
+    @staticmethod
+    def is_available(compression: Compression) -> bool:
+        """
+        Returns whether the compression support has been built and enabled.
+
+        Parameters
+        ----------
+        compression : str
+             Type of compression codec,
+             refer to Codec docstring for a list of supported ones.
+
+        Returns
+        -------
+        bool
+        """
+    @staticmethod
+    def supports_compression_level(compression: Compression) -> int:
+        """
+        Returns true if the compression level parameter is supported
+        for the given codec.
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def default_compression_level(compression: Compression) -> int:
+        """
+        Returns the compression level that Arrow will use for the codec if
+        None is specified.
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def minimum_compression_level(compression: Compression) -> int:
+        """
+        Returns the smallest valid value for the compression level
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def maximum_compression_level(compression: Compression) -> int:
+        """
+        Returns the largest valid value for the compression level
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @property
+    def name(self) -> Compression:
+        """Returns the name of the codec"""
+    @property
+    def compression_level(self) -> int:
+        """Returns the compression level parameter of the codec"""
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[False],
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[True],
+        memory_pool: MemoryPool | None = None,
+    ) -> bytes: ...
+    def compress(self, *args, **kwargs):
+        """
+        Compress data from buffer-like object.
+
+        Parameters
+        ----------
+        buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
+        asbytes : bool, default False
+            Return result as Python bytes object, otherwise Buffer
+        memory_pool : MemoryPool, default None
+            Memory pool to use for buffer allocations, if any
+
+        Returns
+        -------
+        compressed : pyarrow.Buffer or bytes (if asbytes=True)
+        """
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[False],
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[True],
+        memory_pool: MemoryPool | None = None,
+    ) -> bytes: ...
+    def decompress(self, *args, **kwargs):
+        """
+        Decompress data from buffer-like object.
+
+        Parameters
+        ----------
+        buf : pyarrow.Buffer, bytes, or memoryview-compatible object
+        decompressed_size : int, default None
+            Size of the decompressed result
+        asbytes : boolean, default False
+            Return result as Python bytes object, otherwise Buffer
+        memory_pool : MemoryPool, default None
+            Memory pool to use for buffer allocations, if any.
+
+        Returns
+        -------
+        uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
+        """
+
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False],
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[True],
+    memory_pool: MemoryPool | None = None,
+) -> bytes: ...
+def compress(*args, **kwargs):
+    """
+    Compress data from buffer-like object.
+
+    Parameters
+    ----------
+    buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
+    codec : str, default 'lz4'
+        Compression codec.
+        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
+    asbytes : bool, default False
+        Return result as Python bytes object, otherwise Buffer.
+    memory_pool : MemoryPool, default None
+        Memory pool to use for buffer allocations, if any.
+
+    Returns
+    -------
+    compressed : pyarrow.Buffer or bytes (if asbytes=True)
+    """
+
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False],
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[True],
+    memory_pool: MemoryPool | None = None,
+) -> bytes: ...
+def decompress(*args, **kwargs):
+    """
+    Decompress data from buffer-like object.
+
+    Parameters
+    ----------
+    buf : pyarrow.Buffer, bytes, or memoryview-compatible object
+        Input object to decompress data from.
+    decompressed_size : int, default None
+        Size of the decompressed result
+    codec : str, default 'lz4'
+        Compression codec.
+        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
+    asbytes : bool, default False
+        Return result as Python bytes object, otherwise Buffer.
+    memory_pool : MemoryPool, default None
+        Memory pool to use for buffer allocations, if any.
+
+    Returns
+    -------
+    uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
+    """
+
+def input_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> BufferReader:
+    """
+    Create an Arrow input stream.
+
+    Parameters
+    ----------
+    source : str, Path, buffer, or file-like object
+        The source to open for reading.
+    compression : str optional, default 'detect'
+        The compression algorithm to use for on-the-fly decompression.
+        If "detect" and source is a file path, then compression will be
+        chosen based on the file extension.
+        If None, no compression will be applied.
+        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
+    buffer_size : int, default None
+        If None or 0, no buffering will happen. Otherwise the size of the
+        temporary read buffer.
+
+    Examples
+    --------
+    Create a readable BufferReader (NativeFile) from a Buffer or a memoryview object:
+
+    >>> import pyarrow as pa
+    >>> buf = memoryview(b"some data")
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read(4)
+    b'some'
+
+    Create a readable OSFile (NativeFile) from a string or file path:
+
+    >>> import gzip
+    >>> with gzip.open("example.gz", "wb") as f:
+    ...     f.write(b"some data")
+    9
+    >>> with pa.input_stream("example.gz") as stream:
+    ...     stream.read()
+    b'some data'
+
+    Create a readable PythonFile (NativeFile) from a a Python file object:
+
+    >>> with open("example.txt", mode="w") as f:
+    ...     f.write("some text")
+    9
+    >>> with pa.input_stream("example.txt") as stream:
+    ...     stream.read(6)
+    b'some t'
+    """
+
+def output_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> NativeFile:
+    """
+    Create an Arrow output stream.
+
+    Parameters
+    ----------
+    source : str, Path, buffer, file-like object
+        The source to open for writing.
+    compression : str optional, default 'detect'
+        The compression algorithm to use for on-the-fly compression.
+        If "detect" and source is a file path, then compression will be
+        chosen based on the file extension.
+        If None, no compression will be applied.
+        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
+    buffer_size : int, default None
+        If None or 0, no buffering will happen. Otherwise the size of the
+        temporary write buffer.
+
+    Examples
+    --------
+    Create a writable NativeFile from a pyarrow Buffer:
+
+    >>> import pyarrow as pa
+    >>> data = b"buffer data"
+    >>> empty_obj = bytearray(11)
+    >>> buf = pa.py_buffer(empty_obj)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(data)
+    11
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read(6)
+    b'buffer'
+
+    or from a memoryview object:
+
+    >>> buf = memoryview(empty_obj)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(data)
+    11
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read()
+    b'buffer data'
+
+    Create a writable NativeFile from a string or file path:
+
+    >>> with pa.output_stream("example_second.txt") as stream:
+    ...     stream.write(b"Write some data")
+    15
+    >>> with pa.input_stream("example_second.txt") as stream:
+    ...     stream.read()
+    b'Write some data'
+    """
+
+__all__ = [
+    "have_libhdfs",
+    "io_thread_count",
+    "set_io_thread_count",
+    "NativeFile",
+    "PythonFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "OSFile",
+    "FixedSizeBufferWriter",
+    "Buffer",
+    "ResizableBuffer",
+    "allocate_buffer",
+    "BufferOutputStream",
+    "MockOutputStream",
+    "BufferReader",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "TransformInputStream",
+    "Transcoder",
+    "transcoding_input_stream",
+    "py_buffer",
+    "foreign_buffer",
+    "as_buffer",
+    "CacheOptions",
+    "Codec",
+    "compress",
+    "decompress",
+    "input_stream",
+    "output_stream",
+]
diff --git a/python/stubs/__lib_pxi/ipc.pyi b/python/stubs/__lib_pxi/ipc.pyi
new file mode 100644
index 00000000000..3d72892061e
--- /dev/null
+++ b/python/stubs/__lib_pxi/ipc.pyi
@@ -0,0 +1,705 @@
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile
+from .types import DictionaryMemo, KeyValueMetadata
+
+class MetadataVersion(enum.IntEnum):
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+class WriteStats(NamedTuple):
+    """IPC write statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class ReadStats(NamedTuple):
+    """IPC read statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+
+    Parameters
+    ----------
+    ensure_native_endian : bool, default True
+        Whether to convert incoming data to platform-native endianness.
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema
+    """
+
+    ensure_native_endian: bool
+    use_threads: bool
+    included_fields: list[int]
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+class IpcWriteOptions(_Weakrefable):
+    """
+    Serialization options for the IPC format.
+
+    Parameters
+    ----------
+    metadata_version : MetadataVersion, default MetadataVersion.V5
+        The metadata version to write.  V5 is the current and latest,
+        V4 is the pre-1.0 metadata version (with incompatible Union layout).
+    allow_64bit : bool, default False
+        If true, allow field lengths that don't fit in a signed 32-bit int.
+    use_legacy_format : bool, default False
+        Whether to use the pre-Arrow 0.15 IPC format.
+    compression : str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like compression.
+    emit_dictionary_deltas : bool
+        Whether to emit dictionary deltas.  Default is false for maximum
+        stream compatibility.
+    unify_dictionaries : bool
+        If true then calls to write_table will attempt to unify dictionaries
+        across all batches in the table.  This can help avoid the need for
+        replacement dictionaries (which the file format does not support)
+        but requires computing the unified dictionary and then remapping
+        the indices arrays.
+
+        This parameter is ignored when writing to the IPC stream format as
+        the IPC stream format can support replacement dictionaries.
+    """
+
+    metadata_version: MetadataVersion
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Codec | Literal["lz4", "zstd"] | None
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+class Message(_Weakrefable):
+    """
+    Container for an Arrow IPC message with metadata and optional body
+    """
+
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+    def serialize_to(
+        self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
+    ):
+        """
+        Write message to generic OutputStream
+
+        Parameters
+        ----------
+        sink : NativeFile
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+        """
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write message as encapsulated IPC message
+
+        Parameters
+        ----------
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+        """
+
+class MessageReader(_Weakrefable):
+    """
+    Interface for reading Message objects from some source (like an
+    InputStream)
+    """
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile | IOBase | SupportPyBuffer) -> Self:
+        """
+        Open stream from source, if you want to use memory map use
+        MemoryMappedFile as source.
+
+        Parameters
+        ----------
+        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+            A readable source, like an InputStream
+        """
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message:
+        """
+        Read next Message from the stream.
+
+        Raises
+        ------
+        StopIteration
+            At end of stream
+        """
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+class _CRecordBatchWriter(_Weakrefable):
+    """The base RecordBatchWriter wrapper.
+
+    Provides common implementations of convenience methods. Should not
+    be instantiated directly by user code.
+    """
+    def write(self, table_or_batch: Table | RecordBatch):
+        """
+        Write RecordBatch or Table to stream.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        """
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ):
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        custom_metadata : mapping or KeyValueMetadata
+            Keys and values must be string-like / coercible to bytes
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats:
+        """
+        Current IPC write statistics.
+        """
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    def __dealloc__(self) -> None: ...
+    def _open(self, sink, schema: Schema, options: IpcWriteOptions = IpcWriteOptions()): ...
+
+class _ReadPandasMixin:
+    def read_pandas(self, **options) -> pd.DataFrame:
+        """
+        Read contents of stream to a pandas.DataFrame.
+
+        Read all record batches as a pyarrow.Table then convert it to a
+        pandas.DataFrame using Table.to_pandas.
+
+        Parameters
+        ----------
+        **options
+            Arguments to forward to :meth:`Table.to_pandas`.
+
+        Returns
+        -------
+        df : pandas.DataFrame
+        """
+
+class RecordBatchReader(_Weakrefable):
+    """Base class for reading stream of record batches.
+
+    Record batch readers function as iterators of record batches that also
+    provide the schema (without the need to get any batches).
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatchReader.from_*`` functions instead.
+
+    Notes
+    -----
+    To import and export using the Arrow C stream interface, use the
+    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
+    interface is intended for expert users.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([("x", pa.int64())])
+    >>> def iter_record_batches():
+    ...     for i in range(2):
+    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
+    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
+    >>> print(reader.schema)
+    x: int64
+    >>> for batch in reader:
+    ...     print(batch)
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    """
+
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch:
+        """
+        Read next RecordBatch from the stream.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        RecordBatch
+        """
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema:
+        """
+        Shared schema of the record batches in the stream.
+
+        Returns
+        -------
+        Schema
+        """
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata:
+        """
+        Read next RecordBatch from the stream along with its custom metadata.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]:
+        """
+        Iterate over record batches from the stream along with their custom
+        metadata.
+
+        Yields
+        ------
+        RecordBatchWithMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table.
+
+        Returns
+        -------
+        Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def close(self) -> None:
+        """
+        Release any resources associated with the reader.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self:
+        """
+        Wrap this reader with one that casts each batch lazily as it is pulled.
+        Currently only a safe cast to target_schema is implemented.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowArrayStream struct, given its pointer.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        Be careful: if you don't pass the ArrowArrayStream struct to a
+        consumer, array memory will leak.  This is a low-level function
+        intended for expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream struct,
+        given its pointer.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_stream(cls, data: SupportArrowStream, schema: Schema | None = None) -> Self:
+        """
+        Create RecordBatchReader from a Arrow-compatible stream object.
+
+        This accepts objects implementing the Arrow PyCapsule Protocol for
+        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
+
+        Parameters
+        ----------
+        data : Arrow-compatible stream object
+            Any object that implements the Arrow PyCapsule Protocol for
+            streams.
+        schema : Schema, default None
+            The schema to which the stream should be casted, if supported
+            by the stream object.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self:
+        """
+        Create RecordBatchReader from an iterable of batches.
+
+        Parameters
+        ----------
+        schema : Schema
+            The shared schema of the record batches
+        batches : Iterable[RecordBatch]
+            The batches that this reader will return.
+
+        Returns
+        -------
+        reader : RecordBatchReader
+        """
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    @property
+    def stats(self) -> ReadStats:
+        """
+        Current IPC read statistics.
+        """
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter): ...
+
+class RecordBatchWithMetadata(NamedTuple):
+    """RecordBatch with its custom metadata
+
+    Parameters
+    ----------
+    batch : RecordBatch
+    custom_metadata : KeyValueMetadata
+    """
+
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+class _RecordBatchFileReader(_Weakrefable):
+    @property
+    def num_record_batches(self) -> int:
+        """
+        The number of record batches in the IPC file.
+        """
+    def get_batch(self, i: int) -> RecordBatch:
+        """
+        Read the record batch with the given index.
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        """
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata:
+        """
+        Read the record batch with the given index along with
+        its custom metadata
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+
+def get_tensor_size(tensor: Tensor) -> int:
+    """
+    Return total size of serialized Tensor including metadata and padding.
+
+    Parameters
+    ----------
+    tensor : Tensor
+        The tensor for which we want to known the size.
+    """
+
+def get_record_batch_size(batch: RecordBatch) -> int:
+    """
+    Return total size of serialized RecordBatch including metadata and padding.
+
+    Parameters
+    ----------
+    batch : RecordBatch
+        The recordbatch for which we want to know the size.
+    """
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int:
+    """
+    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
+
+    Parameters
+    ----------
+    tensor : pyarrow.Tensor
+    dest : pyarrow.NativeFile
+
+    Returns
+    -------
+    bytes_written : int
+        Total number of bytes written to the file
+    """
+
+def read_tensor(source: NativeFile) -> Tensor:
+    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
+    position. If the file source supports zero copy (e.g. a memory map), then
+    this operation does not allocate any memory. This function not assume that
+    the stream is aligned
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile
+
+    Returns
+    -------
+    tensor : Tensor
+
+    """
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message:
+    """
+    Read length-prefixed message from file or buffer-like object
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile, file-like object, or buffer-like object
+
+    Returns
+    -------
+    message : Message
+    """
+
+def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo | None = None) -> Schema:
+    """
+    Read Schema from message or buffer
+
+    Parameters
+    ----------
+    obj : buffer or Message
+    dictionary_memo : DictionaryMemo, optional
+        Needed to be able to reconstruct dictionary-encoded fields
+        with read_record_batch
+
+    Returns
+    -------
+    schema : Schema
+    """
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
+) -> RecordBatch:
+    """
+    Read RecordBatch from message, given a known schema. If reading data from a
+    complete IPC stream, use ipc.open_stream instead
+
+    Parameters
+    ----------
+    obj : Message or Buffer-like
+    schema : Schema
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+
+    Returns
+    -------
+    batch : RecordBatch
+    """
+
+__all__ = [
+    "MetadataVersion",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/stubs/__lib_pxi/memory.pyi b/python/stubs/__lib_pxi/memory.pyi
new file mode 100644
index 00000000000..57a3bb4f1b3
--- /dev/null
+++ b/python/stubs/__lib_pxi/memory.pyi
@@ -0,0 +1,174 @@
+from pyarrow.lib import _Weakrefable
+
+class MemoryPool(_Weakrefable):
+    """
+    Base class for memory allocation.
+
+    Besides tracking its number of allocated bytes, a memory pool also
+    takes care of the required 64-byte alignment for Arrow data.
+    """
+
+    def release_unused(self) -> None:
+        """
+        Attempt to return to the OS any memory being held onto by the pool.
+
+        This function should not be called except potentially for
+        benchmarking or debugging as it could be expensive and detrimental to
+        performance.
+
+        This is best effort and may not have any effect on some memory pools
+        or in some situations (e.g. fragmentation).
+        """
+    def bytes_allocated(self) -> int:
+        """
+        Return the number of bytes that are currently allocated from this
+        memory pool.
+        """
+    def total_bytes_allocated(self) -> int:
+        """
+        Return the total number of bytes that have been allocated from this
+        memory pool.
+        """
+    def max_memory(self) -> int | None:
+        """
+        Return the peak memory allocation in this memory pool.
+        This can be an approximate number in multi-threaded applications.
+
+        None is returned if the pool implementation doesn't know how to
+        compute this number.
+        """
+    def num_allocations(self) -> int:
+        """
+        Return the number of allocations or reallocations that were made
+        using this memory pool.
+        """
+    def print_stats(self) -> None:
+        """
+        Print statistics about this memory pool.
+
+        The output format is implementation-specific. Not all memory pools
+        implement this method.
+        """
+    @property
+    def backend_name(self) -> str:
+        """
+        The name of the backend used by this MemoryPool (e.g. "jemalloc").
+        """
+
+class LoggingMemoryPool(MemoryPool): ...
+class ProxyMemoryPool(MemoryPool): ...
+
+def default_memory_pool() -> MemoryPool:
+    """
+    Return the process-global memory pool.
+
+    Examples
+    --------
+    >>> default_memory_pool()
+    <pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
+    """
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool:
+    """
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but with separate allocation statistics.
+
+    Parameters
+    ----------
+    parent : MemoryPool
+        The real memory pool that should be used for allocations.
+    """
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool:
+    """
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but also dumps allocation logs on stderr.
+
+    Parameters
+    ----------
+    parent : MemoryPool
+        The real memory pool that should be used for allocations.
+    """
+
+def system_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the C malloc heap.
+    """
+
+def jemalloc_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the jemalloc heap.
+
+    NotImplementedError is raised if jemalloc support is not enabled.
+    """
+
+def mimalloc_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the mimalloc heap.
+
+    NotImplementedError is raised if mimalloc support is not enabled.
+    """
+
+def set_memory_pool(pool: MemoryPool) -> None:
+    """
+    Set the default memory pool.
+
+    Parameters
+    ----------
+    pool : MemoryPool
+        The memory pool that should be used by default.
+    """
+
+def log_memory_allocations(enable: bool = True) -> None:
+    """
+    Enable or disable memory allocator logging for debugging purposes
+
+    Parameters
+    ----------
+    enable : bool, default True
+        Pass False to disable logging
+    """
+
+def total_allocated_bytes() -> int:
+    """
+    Return the currently allocated bytes from the default memory pool.
+    Other memory pools may not be accounted for.
+    """
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None:
+    """
+    Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
+    milliseconds. A value of 0 (the default) results in dirty / muzzy memory
+    pages being released right away to the OS, while a higher value will result
+    in a time-based decay. See the jemalloc docs for more information
+
+    It's best to set this at the start of your application.
+
+    Parameters
+    ----------
+    decay_ms : int
+        Number of milliseconds to set for jemalloc decay conf parameters. Note
+        that this change will only affect future memory arenas
+    """
+
+def supported_memory_backends() -> list[str]:
+    """
+    Return a list of available memory pool backends
+    """
+
+__all__ = [
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "default_memory_pool",
+    "proxy_memory_pool",
+    "logging_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "set_memory_pool",
+    "log_memory_allocations",
+    "total_allocated_bytes",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+]
diff --git a/python/stubs/__lib_pxi/pandas_shim.pyi b/python/stubs/__lib_pxi/pandas_shim.pyi
new file mode 100644
index 00000000000..0e80fae4ebf
--- /dev/null
+++ b/python/stubs/__lib_pxi/pandas_shim.pyi
@@ -0,0 +1,51 @@
+from types import ModuleType
+from typing import Any, Iterable, TypeGuard
+
+import pandas as pd
+
+from numpy import dtype
+from pandas.core.dtypes.base import ExtensionDtype
+
+class _PandasAPIShim:
+    has_sparse: bool
+
+    def series(self, *args, **kwargs) -> pd.Series: ...
+    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
+    @property
+    def have_pandas(self) -> bool: ...
+    @property
+    def compat(self) -> ModuleType: ...
+    @property
+    def pd(self) -> ModuleType: ...
+    def infer_dtype(self, obj: Iterable) -> str: ...
+    def pandas_dtype(self, dtype: str) -> dtype: ...
+    @property
+    def loose_version(self) -> Any: ...
+    @property
+    def version(self) -> str: ...
+    def is_v1(self) -> bool: ...
+    def is_ge_v21(self) -> bool: ...
+    def is_ge_v23(self) -> bool: ...
+    def is_ge_v3(self) -> bool: ...
+    @property
+    def categorical_type(self) -> type[pd.Categorical]: ...
+    @property
+    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
+    @property
+    def extension_dtype(self) -> type[ExtensionDtype]: ...
+    def is_array_like(
+        self, obj: Any
+    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
+    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
+    def is_sparse(self, obj: Any) -> bool: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
+    def get_values(self, obj: Any) -> bool: ...
+    def get_rangeindex_attribute(self, level, name): ...
+
+_pandas_api: _PandasAPIShim
+
+__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/stubs/__lib_pxi/scalar.pyi b/python/stubs/__lib_pxi/scalar.pyi
new file mode 100644
index 00000000000..81ab5012067
--- /dev/null
+++ b/python/stubs/__lib_pxi/scalar.pyi
@@ -0,0 +1,1017 @@
+import collections.abc
+import datetime as dt
+import sys
+
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Any, Generic, Iterator, Literal, Mapping, overload
+
+import numpy as np
+
+from pyarrow._compute import CastOptions
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from typing_extensions import Protocol, TypeVar
+
+from . import types
+from .types import (
+    _AsPyType,
+    _DataTypeT,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+)
+
+_AsPyTypeK = TypeVar("_AsPyTypeK")
+_AsPyTypeV = TypeVar("_AsPyTypeV")
+_DataType_co = TypeVar("_DataType_co", bound=types.DataType, covariant=True)
+
+class Scalar(_Weakrefable, Generic[_DataType_co]):
+    """
+    The base class for scalars.
+    """
+    @property
+    def type(self) -> _DataType_co:
+        """
+        Data type of the Scalar object.
+        """
+    @property
+    def is_valid(self) -> bool:
+        """
+        Holds a valid (non-null) value.
+        """
+    @overload
+    def cast(
+        self,
+        target_type: None,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+    @overload
+    def cast(
+        self,
+        target_type: _DataTypeT,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Scalar[_DataTypeT]: ...
+    def cast(self, *args, **kwargs):
+        """
+        Cast scalar value to another data type.
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, default None
+            Type to cast scalar to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+        memory_pool : MemoryPool, optional
+            memory pool to use for allocations during function execution.
+
+        Returns
+        -------
+        scalar : A Scalar of the given target data type.
+        """
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def equals(self, other: Scalar) -> bool: ...
+    def __hash__(self) -> int: ...
+    @overload
+    def as_py(
+        self: Scalar[types._BasicDataType[_AsPyType]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> _AsPyType: ...
+    @overload
+    def as_py(
+        self: Scalar[types.ListType[types._BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.ListType[
+                types.DictionaryType[types._IndexT, types._BasicDataType[_AsPyTypeV], Any]
+            ]
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[int, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.ListType[types.DictionaryType[Any, types._BasicDataType[_AsPyTypeV], Any]],
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[Any, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.ListType[types.DictionaryType[types._IndexT, Any, Any]],],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[int, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.StructType],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[str, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[Any, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[_AsPyTypeK, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[Any],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> Any: ...
+    def as_py(self, *args, **kwargs):
+        """
+        Return this value as a Python representation.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+        """
+
+_NULL: TypeAlias = None
+NA = _NULL
+
+class NullScalar(Scalar[types.NullType]): ...
+class BooleanScalar(Scalar[types.BoolType]): ...
+class UInt8Scalar(Scalar[types.UInt8Type]): ...
+class Int8Scalar(Scalar[types.Int8Type]): ...
+class UInt16Scalar(Scalar[types.UInt16Type]): ...
+class Int16Scalar(Scalar[types.Int16Type]): ...
+class UInt32Scalar(Scalar[types.Uint32Type]): ...
+class Int32Scalar(Scalar[types.Int32Type]): ...
+class UInt64Scalar(Scalar[types.UInt64Type]): ...
+class Int64Scalar(Scalar[types.Int64Type]): ...
+class HalfFloatScalar(Scalar[types.Float16Type]): ...
+class FloatScalar(Scalar[types.Float32Type]): ...
+class DoubleScalar(Scalar[types.Float64Type]): ...
+class Decimal32Scalar(Scalar[types.Decimal32Type[types._Precision, types._Scale]]): ...
+class Decimal64Scalar(Scalar[types.Decimal64Type[types._Precision, types._Scale]]): ...
+class Decimal128Scalar(Scalar[types.Decimal128Type[types._Precision, types._Scale]]): ...
+class Decimal256Scalar(Scalar[types.Decimal256Type[types._Precision, types._Scale]]): ...
+class Date32Scalar(Scalar[types.Date32Type]): ...
+
+class Date64Scalar(Scalar[types.Date64Type]):
+    @property
+    def value(self) -> dt.date | None: ...
+
+class Time32Scalar(Scalar[types.Time32Type[_Time32Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+class Time64Scalar(Scalar[types.Time64Type[_Time64Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+class TimestampScalar(Scalar[types.TimestampType[_Unit, _Tz]]):
+    @property
+    def value(self) -> int | None: ...
+
+class DurationScalar(Scalar[types.DurationType[_Unit]]):
+    @property
+    def value(self) -> dt.timedelta | None: ...
+
+class MonthDayNanoIntervalScalar(Scalar[types.MonthDayNanoIntervalType]):
+    @property
+    def value(self) -> MonthDayNano | None: ...
+
+class BinaryScalar(Scalar[types.BinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class LargeBinaryScalar(Scalar[types.LargeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class FixedSizeBinaryScalar(Scalar[types.FixedSizeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class StringScalar(Scalar[types.StringType]):
+    def as_buffer(self) -> Buffer: ...
+
+class LargeStringScalar(Scalar[types.LargeStringType]):
+    def as_buffer(self) -> Buffer: ...
+
+class BinaryViewScalar(Scalar[types.BinaryViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+class StringViewScalar(Scalar[types.StringViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+class ListScalar(Scalar[types.ListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class FixedSizeListScalar(Scalar[types.FixedSizeListType[_DataTypeT, types._Size]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class LargeListScalar(Scalar[types.LargeListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class ListViewScalar(Scalar[types.ListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class LargeListViewScalar(Scalar[types.LargeListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar]):
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[str]: ...
+    def __getitem__(self, __key: str) -> Scalar[Any]: ...  # type: ignore[override]
+    def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
+
+class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> tuple[Scalar[types._K], types._ValueT, Any]: ...
+    @overload
+    def __iter__(
+        self: Scalar[
+            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
+        ],
+    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
+    @overload
+    def __iter__(
+        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]],],
+    ) -> Iterator[tuple[Any, _AsPyTypeV]]: ...
+    @overload
+    def __iter__(
+        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any],],
+    ) -> Iterator[tuple[_AsPyTypeK, Any]]: ...
+
+class DictionaryScalar(Scalar[types.DictionaryType[types._IndexT, types._BasicValueT]]):
+    @property
+    def index(self) -> Scalar[types._IndexT]: ...
+    @property
+    def value(self) -> Scalar[types._BasicValueT]: ...
+    @property
+    def dictionary(self) -> Array: ...
+
+class RunEndEncodedScalar(Scalar[types.RunEndEncodedType[types._RunEndType, types._BasicValueT]]):
+    @property
+    def value(self) -> tuple[int, types._BasicValueT] | None: ...
+
+class UnionScalar(Scalar[types.UnionType]):
+    @property
+    def value(self) -> Any | None: ...
+    @property
+    def type_code(self) -> str: ...
+
+class ExtensionScalar(Scalar[types.ExtensionType]):
+    @property
+    def value(self) -> Any | None: ...
+    @staticmethod
+    def from_storage(typ: types.BaseExtensionType, value) -> ExtensionScalar:
+        """
+        Construct ExtensionScalar from type and storage value.
+
+        Parameters
+        ----------
+        typ : DataType
+            The extension type for the result scalar.
+        value : object
+            The storage value for the result scalar.
+
+        Returns
+        -------
+        ext_scalar : ExtensionScalar
+        """
+
+class Bool8Scalar(Scalar[types.Bool8Type]): ...
+class UuidScalar(Scalar[types.UuidType]): ...
+class JsonScalar(Scalar[types.JsonType]): ...
+class OpaqueScalar(Scalar[types.OpaqueType]): ...
+
+class FixedShapeTensorScalar(ExtensionScalar):
+    def to_numpy(self) -> np.ndarray:
+        """
+        Convert fixed shape tensor scalar to a numpy.ndarray.
+
+        The resulting ndarray's shape matches the permuted shape of the
+        fixed shape tensor scalar.
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
+        and strides derived from corresponding FixedShapeTensorType.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor represented stored in FixedShapeTensorScalar.
+        """
+
+_V = TypeVar("_V")
+
+class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
+    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, item: Any, /) -> bool: ...
+
+@overload
+def scalar(
+    value: str,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringScalar: ...
+@overload
+def scalar(
+    value: bytes,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryScalar: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: bool,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanScalar: ...
+@overload
+def scalar(
+    value: int,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Scalar: ...
+@overload
+def scalar(
+    value: float,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleScalar: ...
+@overload
+def scalar(
+    value: Decimal,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Scalar: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: dt.datetime,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampScalar[Literal["us"]]: ...
+@overload
+def scalar(
+    value: dt.date,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Scalar: ...
+@overload
+def scalar(
+    value: dt.time,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Scalar[Literal["us"]]: ...
+@overload
+def scalar(
+    value: dt.timedelta,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DurationScalar[Literal["us"]]: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: MonthDayNano,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalScalar: ...
+@overload
+def scalar(
+    value: Mapping[str, Any],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StructScalar: ...
+@overload
+def scalar(
+    value: NullableCollection[str],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.StringType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[bytes],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.BinaryType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[bool],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.BoolType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[int],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Int64Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[float],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Float64Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[Decimal],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Decimal32Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.datetime],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.TimestampType[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.date],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Date32Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.time],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Time64Type[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.timedelta],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.DurationType[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[MonthDayNano],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.MonthDayNanoIntervalType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[Any],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[Any]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.NullType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> NullScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BoolType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt16Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int16Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Uint32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> HalfFloatScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> FloatScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Date32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Date64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.MonthDayNanoIntervalType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StringType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeStringType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StringViewType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BinaryType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeBinaryType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BinaryViewType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.TimestampType[types._Unit, types._Tz],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampScalar[types._Unit, types._Tz]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Time32Type[types._Time32Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Scalar[types._Time32Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Time64Type[types._Time64Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Scalar[types._Time64Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.DurationType[types._Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DurationScalar[types._Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal32Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal32Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal64Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal64Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal128Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal256Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal256Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.ListType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeListType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.ListViewType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListViewScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeListViewType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.FixedSizeListType[_DataTypeT, types._Size],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListScalar[_DataTypeT, types._Size]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.DictionaryType[types._IndexT, types._BasicValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryScalar[types._IndexT, types._BasicValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.MapType[types._K, types._ValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MapScalar[types._K, types._ValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StructType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StructScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UnionType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UnionScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.RunEndEncodedType[types._RunEndType, types._BasicValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedScalar[types._RunEndType, types._BasicValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Bool8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UuidType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UuidScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.JsonType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JsonScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.OpaqueType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: _DataTypeT,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar[_DataTypeT]: ...
+def scalar(*args, **kwargs):
+    """
+    Create a pyarrow.Scalar instance from a Python object.
+
+    Parameters
+    ----------
+    value : Any
+        Python object coercible to arrow's type system.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the value.
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. Defaults to False if not passed explicitly by user,
+        or True if a pandas object is passed in.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
+
+    Returns
+    -------
+    scalar : pyarrow.Scalar
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    >>> pa.scalar(42)
+    <pyarrow.Int64Scalar: 42>
+
+    >>> pa.scalar("string")
+    <pyarrow.StringScalar: 'string'>
+
+    >>> pa.scalar([1, 2])
+    <pyarrow.ListScalar: [1, 2]>
+
+    >>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
+    <pyarrow.ListScalar: [1, 2]>
+    """
+
+__all__ = [
+    "Scalar",
+    "_NULL",
+    "NA",
+    "NullScalar",
+    "BooleanScalar",
+    "UInt8Scalar",
+    "Int8Scalar",
+    "UInt16Scalar",
+    "Int16Scalar",
+    "UInt32Scalar",
+    "Int32Scalar",
+    "UInt64Scalar",
+    "Int64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "FixedSizeBinaryScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "BinaryViewScalar",
+    "StringViewScalar",
+    "ListScalar",
+    "FixedSizeListScalar",
+    "LargeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "StructScalar",
+    "MapScalar",
+    "DictionaryScalar",
+    "RunEndEncodedScalar",
+    "UnionScalar",
+    "ExtensionScalar",
+    "FixedShapeTensorScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "scalar",
+]
diff --git a/python/stubs/__lib_pxi/table.pyi b/python/stubs/__lib_pxi/table.pyi
new file mode 100644
index 00000000000..ad9d0392137
--- /dev/null
+++ b/python/stubs/__lib_pxi/table.pyi
@@ -0,0 +1,5609 @@
+import datetime as dt
+import sys
+
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import (
+    Any,
+    Collection,
+    Generator,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    Mapping,
+    Sequence,
+    TypeVar,
+    overload,
+)
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, Field, MemoryManager, MemoryPool, MonthDayNano, Schema
+
+from . import array, scalar, types
+from .array import Array, NullableCollection, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from .ipc import RecordBatchReader
+from .scalar import Int64Scalar, Scalar
+from .tensor import Tensor
+from .types import _AsPyType, _BasicDataType, _DataTypeT
+
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed
+AggregateOptions: TypeAlias = (
+    ScalarAggregateOptions | CountOptions | TDigestOptions | VarianceOptions | FunctionOptions
+)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    """
+    An array-like composed from a (possibly empty) collection of pyarrow.Arrays
+
+    Warnings
+    --------
+    Do not call this class's constructor directly.
+
+    Examples
+    --------
+    To construct a ChunkedArray object use :func:`pyarrow.chunked_array`:
+
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    >>> isinstance(pa.chunked_array([[2, 2, 4], [4, 5, 100]]), pa.ChunkedArray)
+    True
+    """
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT:
+        """
+        Return data type of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def length(self) -> int:
+        """
+        Return length of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.length()
+        6
+        """
+    __len__ = length
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str:
+        """
+        Render a "pretty-printed" string representation of the ChunkedArray
+
+        Parameters
+        ----------
+        indent : int
+            How much to indent right the content of the array,
+            by default ``0``.
+        window : int
+            How many items to preview within each chunk at the begin and end
+            of the chunk when the chunk is bigger than the window.
+            The other elements will be ellipsed.
+        container_window : int
+            How many chunks to preview at the begin and end
+            of the array when the array is bigger than the window.
+            The other elements will be ellipsed.
+            This setting also applies to list columns.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_string(skip_new_lines=True)
+        '[[2,2,4],[4,5,100]]'
+        """
+    format = to_string
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null entries
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.null_count
+        1
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the chunked array.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.nbytes
+        49
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the chunked array.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.get_total_buffer_size()
+        49
+        """
+    def __sizeof__(self) -> int: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    @overload
+    def __getitem__(self, key: int) -> _Scalar_co: ...
+    def __getitem__(self, key):
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or ChunkedArray (slice)
+        """
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            false,
+            false,
+            false,
+            true,
+            false
+          ]
+        ]
+        """
+    def is_nan(self) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the NaN values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
+        >>> arr.is_nan()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            true,
+            false,
+            false,
+            null,
+            false
+          ]
+        ]
+        """
+    def is_valid(self) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the non-null values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_valid()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            true,
+            true,
+            true
+          ],
+          [
+            true,
+            false,
+            true
+          ]
+        ]
+        """
+    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self:
+        """
+        Replace each null element in values with fill_value.
+
+        See :func:`pyarrow.compute.fill_null` for full usage.
+
+        Parameters
+        ----------
+        fill_value : any
+            The replacement value for null entries.
+
+        Returns
+        -------
+        result : Array or ChunkedArray
+            A new array with nulls replaced by the given value.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> fill_value = pa.scalar(5, type=pa.int8())
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.fill_null(fill_value)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4,
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return whether the contents of two chunked arrays are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.ChunkedArray
+            Chunked array to compare against.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> n_legs.equals(n_legs)
+        True
+        >>> n_legs.equals(animals)
+        False
+        """
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray:
+        """
+        Return a NumPy copy of this array (experimental).
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default False
+            Introduced for signature consistence with pyarrow.Array.to_numpy.
+            This must be False here since NumPy arrays' buffer must be contiguous.
+
+        Returns
+        -------
+        array : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_numpy()
+        array([  2,   2,   4,   4,   5, 100])
+        """
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    @overload
+    def cast(
+        self,
+        target_type: None = None,
+        safe: bool | None = None,
+        options: CastOptions | None = None,
+    ) -> Self: ...
+    @overload
+    def cast(
+        self, target_type: _CastAs, safe: bool | None = None, options: CastOptions | None = None
+    ) -> ChunkedArray[Scalar[_CastAs]]: ...
+    def cast(self, *args, **kwargs):
+        """
+        Cast array values to another data type
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+
+        Change the data type of an array:
+
+        >>> n_legs_seconds = n_legs.cast(pa.duration("s"))
+        >>> n_legs_seconds.type
+        DurationType(duration[s])
+        """
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self:
+        """
+        Compute dictionary-encoded representation of array.
+
+        See :func:`pyarrow.compute.dictionary_encode` for full usage.
+
+        Parameters
+        ----------
+        null_encoding : str, default "mask"
+            How to handle null entries.
+
+        Returns
+        -------
+        encoded : ChunkedArray
+            A dictionary-encoded version of this array.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> animals.dictionary_encode()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]:
+        """
+        Flatten this ChunkedArray.  If it has a struct type, the column is
+        flattened into one array per struct field.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : list of ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> c_arr = pa.chunked_array(n_legs.value_counts())
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          -- is_valid: all not null
+          -- child 0 type: int64
+            [
+              2,
+              4,
+              5,
+              100
+            ]
+          -- child 1 type: int64
+            [
+              2,
+              2,
+              1,
+              1
+            ]
+        ]
+        >>> c_arr.flatten()
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        ]]
+        >>> c_arr.type
+        StructType(struct<values: int64, counts: int64>)
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]:
+        """
+        Flatten this ChunkedArray into a single non-chunked array.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.combine_chunks()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          4,
+          4,
+          5,
+          100
+        ]
+        """
+    def unique(self) -> ChunkedArray[_Scalar_co]:
+        """
+        Compute distinct elements in array
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.unique()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          4,
+          5,
+          100
+        ]
+        """
+    def value_counts(self) -> StructArray:
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        An array of  <input type "Values", int64_t "Counts"> structs
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.value_counts()
+        <pyarrow.lib.StructArray object at ...>
+        -- is_valid: all not null
+        -- child 0 type: int64
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        -- child 1 type: int64
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this ChunkedArray
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.slice(2, 2)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            4
+          ],
+          [
+            4
+          ]
+        ]
+        """
+    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.filter` for full usage.
+
+        Parameters
+        ----------
+        mask : Array or array-like
+            The boolean mask to filter the chunked array with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled.
+
+        Returns
+        -------
+        filtered : Array or ChunkedArray
+            An array of the same type, with only the elements selected by
+            the boolean mask.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> mask = pa.array([True, False, None, True, False, True])
+        >>> n_legs.filter(mask)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        >>> n_legs.filter(mask, null_selection_behavior="emit_null")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            null
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        """
+    @overload
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+    @overload
+    def index(
+        self,
+        value: Scalar[_DataTypeT],
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+    def index(self, *args, **kwargs):
+        """
+        Find the first index of a value.
+
+        See :func:`pyarrow.compute.index` for full usage.
+
+        Parameters
+        ----------
+        value : Scalar or object
+            The value to look for in the array.
+        start : int, optional
+            The start index where to look for `value`.
+        end : int, optional
+            The end index where to look for `value`.
+        memory_pool : MemoryPool, optional
+            A memory pool for potential memory allocations.
+
+        Returns
+        -------
+        index : Int64Scalar
+            The index of the value in the array (-1 if not found).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.index(4)
+        <pyarrow.Int64Scalar: 2>
+        >>> n_legs.index(4, start=3)
+        <pyarrow.Int64Scalar: 3>
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the array whose values will be returned.
+
+        Returns
+        -------
+        taken : Array or ChunkedArray
+            An array with the same datatype, containing the taken values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.take([1, 4, 5])
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            5,
+            100
+          ]
+        ]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove missing values from a chunked array.
+        See :func:`pyarrow.compute.drop_null` for full description.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.drop_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def sort(self, order: Order = "ascending", **kwargs) -> Self:
+        """
+        Sort the ChunkedArray
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : ChunkedArray
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent chunked array, but where all
+        chunks share the same dictionary values.  Dictionary indices are
+        transposed accordingly.
+
+        If there are no dictionaries in the chunked array, it is returned
+        unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ]
+        ]
+        >>> c_arr.unify_dictionaries()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    @property
+    def num_chunks(self) -> int:
+        """
+        Number of underlying chunks.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.num_chunks
+        2
+        """
+    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]:
+        """
+        Select a chunk by its index.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.chunk(1)
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]
+        """
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]:
+        """
+        Convert to a list of single-chunked arrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.chunks
+        [<pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          null
+        ], <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]]
+        """
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.NullScalar],
+    ) -> Generator[array.NullArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BooleanScalar],
+    ) -> Generator[array.BooleanArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt8Scalar],
+    ) -> Generator[array.UInt8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int8Scalar],
+    ) -> Generator[array.Int8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt16Scalar],
+    ) -> Generator[array.UInt16Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int16Scalar],
+    ) -> Generator[array.Int16Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt32Scalar],
+    ) -> Generator[array.UInt32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int32Scalar],
+    ) -> Generator[array.Int32Array, None, None]:
+        """
+        Convert to an iterator of ChunkArrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> for i in n_legs.iterchunks():
+        ...     print(i.null_count)
+        0
+        1
+
+        """
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt64Scalar],
+    ) -> Generator[array.UInt64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int64Scalar],
+    ) -> Generator[array.Int64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.HalfFloatScalar],
+    ) -> Generator[array.HalfFloatArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FloatScalar],
+    ) -> Generator[array.FloatArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DoubleScalar],
+    ) -> Generator[array.DoubleArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal32Scalar],
+    ) -> Generator[array.Decimal32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal64Scalar],
+    ) -> Generator[array.Decimal64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal128Scalar],
+    ) -> Generator[array.Decimal128Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal256Scalar],
+    ) -> Generator[array.Decimal256Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Date32Scalar],
+    ) -> Generator[array.Date32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Date64Scalar],
+    ) -> Generator[array.Date64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Time32Scalar[types._Time32Unit]],
+    ) -> Generator[array.Time32Array[types._Time32Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Time64Scalar[types._Time64Unit]],
+    ) -> Generator[array.Time64Array[types._Time64Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DurationScalar[types._Unit]],
+    ) -> Generator[array.DurationArray[types._Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.MonthDayNanoIntervalScalar],
+    ) -> Generator[array.MonthDayNanoIntervalArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BinaryScalar],
+    ) -> Generator[array.BinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeBinaryScalar],
+    ) -> Generator[array.LargeBinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FixedSizeBinaryScalar],
+    ) -> Generator[array.FixedSizeBinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StringScalar],
+    ) -> Generator[array.StringArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeStringScalar],
+    ) -> Generator[array.LargeStringArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BinaryViewScalar],
+    ) -> Generator[array.BinaryViewArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StringViewScalar],
+    ) -> Generator[array.StringViewArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.ListScalar[_DataTypeT]],
+    ) -> Generator[array.ListArray[scalar.ListScalar[_DataTypeT]], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FixedSizeListScalar[_DataTypeT, types._Size]],
+    ) -> Generator[array.FixedSizeListArray[_DataTypeT, types._Size], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeListScalar[_DataTypeT]],
+    ) -> Generator[array.LargeListArray[_DataTypeT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeListViewScalar[_DataTypeT]],
+    ) -> Generator[array.LargeListViewArray[_DataTypeT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StructScalar],
+    ) -> Generator[array.StructArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.MapScalar[array._MapKeyT, array._MapItemT]],
+    ) -> Generator[array.MapArray[array._MapKeyT, array._MapItemT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DictionaryScalar[types._IndexT, types._BasicValueT]],
+    ) -> Generator[array.DictionaryArray[types._IndexT, types._BasicValueT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.RunEndEncodedScalar],
+    ) -> Generator[array.RunEndEncodedArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UnionScalar],
+    ) -> Generator[array.UnionArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Bool8Scalar],
+    ) -> Generator[array.Bool8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UuidScalar],
+    ) -> Generator[array.UuidArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.JsonScalar],
+    ) -> Generator[array.JsonArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.OpaqueScalar],
+    ) -> Generator[array.OpaqueArray, None, None]: ...
+    def iterchunks(self):
+        """
+        Convert to an iterator of ChunkArrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> for i in n_legs.iterchunks():
+        ...     print(i.null_count)
+        0
+        1
+
+        """
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]:
+        """
+        Convert to a list of native Python objects.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.to_pylist()
+        [2, 2, 4, 4, None, 100]
+        """
+    def __arrow_c_stream__(self, requested_schema=None) -> Any:
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[bool]],
+    type: None = None,
+) -> ChunkedArray[scalar.BooleanScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[int]],
+    type: None = None,
+) -> ChunkedArray[scalar.Int64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[float]],
+    type: None = None,
+) -> ChunkedArray[scalar.DoubleScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[Decimal]],
+    type: None = None,
+) -> ChunkedArray[scalar.Decimal128Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dict[str, Any]]],
+    type: None = None,
+) -> ChunkedArray[scalar.StructScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.datetime]],
+    type: None = None,
+) -> ChunkedArray[scalar.TimestampScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.date]],
+    type: None = None,
+) -> ChunkedArray[scalar.Date32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.time]],
+    type: None = None,
+) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.timedelta]],
+    type: None = None,
+) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[MonthDayNano]],
+    type: None = None,
+) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[str]],
+    type: None = None,
+) -> ChunkedArray[scalar.StringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[bytes]],
+    type: None = None,
+) -> ChunkedArray[scalar.BinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[list[Any]]],
+    type: None = None,
+) -> ChunkedArray[scalar.ListScalar[Any]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["null"] | types.NullType,
+) -> ChunkedArray[scalar.NullScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["bool", "boolean"] | types.BoolType,
+) -> ChunkedArray[scalar.BooleanScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i1", "int8"] | types.Int8Type,
+) -> ChunkedArray[scalar.Int8Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i2", "int16"] | types.Int16Type,
+) -> ChunkedArray[scalar.Int16Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i4", "int32"] | types.Int32Type,
+) -> ChunkedArray[scalar.Int32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i8", "int64"] | types.Int64Type,
+) -> ChunkedArray[scalar.Int64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+) -> ChunkedArray[scalar.UInt8Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+) -> ChunkedArray[scalar.UInt16Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+) -> ChunkedArray[scalar.UInt32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+) -> ChunkedArray[scalar.UInt64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+) -> ChunkedArray[scalar.HalfFloatScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+) -> ChunkedArray[scalar.FloatScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+) -> ChunkedArray[scalar.DoubleScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["string", "str", "utf8"] | types.StringType,
+) -> ChunkedArray[scalar.StringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["binary"] | types.BinaryType,
+) -> ChunkedArray[scalar.BinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+) -> ChunkedArray[scalar.LargeStringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["large_binary"] | types.LargeBinaryType,
+) -> ChunkedArray[scalar.LargeBinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["binary_view"] | types.BinaryViewType,
+) -> ChunkedArray[scalar.BinaryViewScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["string_view"] | types.StringViewType,
+) -> ChunkedArray[scalar.StringViewScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+) -> ChunkedArray[scalar.Date32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+) -> ChunkedArray[scalar.Date64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+) -> ChunkedArray[scalar.Time32Scalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+) -> ChunkedArray[scalar.Time32Scalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+) -> ChunkedArray[scalar.Time64Scalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any]] | SupportArrowStream | SupportArrowArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Array[_ScalarT]],
+    type: None = None,
+) -> ChunkedArray[_ScalarT]: ...
+def chunked_array(value, type=None):
+    """
+    Construct chunked array from list of array-like objects
+
+    Parameters
+    ----------
+    arrays : Array, list of Array, or array-like
+        Must all be the same data type. Can be empty only if type also passed.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` method) can be
+        passed as well.
+    type : DataType or string coercible to DataType
+
+    Returns
+    -------
+    ChunkedArray
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    """
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Return the dataframe interchange object implementing the interchange protocol.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default False
+            Whether to tell the DataFrame to overwrite null values in the data
+            with ``NaN`` (or ``NaT``).
+        allow_copy : bool, default True
+            Whether to allow memory copying when exporting. If set to False
+            it would cause non-zero-copy exports to fail.
+
+        Returns
+        -------
+        DataFrame interchange object
+            The object which consuming library can use to ingress the dataframe.
+
+        Notes
+        -----
+        Details on the interchange protocol:
+        https://data-apis.org/dataframe-protocol/latest/index.html
+        `nan_as_null` currently has no effect; once support for nullable extension
+        dtypes is added, this value should be propagated to columns.
+        """
+    @overload
+    def __getitem__(self, key: int | str) -> _ColumnT: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key):
+        """
+        Slice or return column at given index or column name
+
+        Parameters
+        ----------
+        key : integer, str, or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        Array (from RecordBatch) or ChunkedArray (from Table) for column input.
+        RecordBatch or Table for slice input.
+        """
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT:
+        """
+        Select single column from Table or RecordBatch.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the column to retrieve.
+
+        Returns
+        -------
+        column : Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Select a column by numeric index:
+
+        >>> table.column(0)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ]
+
+        Select a column by its name:
+
+        >>> table.column("animals")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            "Brittle stars",
+            "Centipede"
+          ]
+        ]
+        """
+    @property
+    def column_names(self) -> list[str]:
+        """
+        Names of the Table or RecordBatch columns.
+
+        Returns
+        -------
+        list of str
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> table = pa.Table.from_arrays(
+        ...     [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
+        ...     names=["n_legs", "animals"],
+        ... )
+        >>> table.column_names
+        ['n_legs', 'animals']
+        """
+    @property
+    def columns(self) -> list[_ColumnT]:
+        """
+        List of all columns in numerical order.
+
+        Returns
+        -------
+        columns : list of Array (for RecordBatch) or list of ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.columns
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            null,
+            4,
+            5,
+            null
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            null,
+            "Centipede"
+          ]
+        ]]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove rows that contain missing values from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.drop_null` for full usage.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, with rows containing
+            no missing values.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [None, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", None, "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.drop_null()
+        pyarrow.Table
+        year: double
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def field(self, i: int | str) -> Field:
+        """
+        Select a schema field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the field to retrieve.
+
+        Returns
+        -------
+        Field
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.field(0)
+        pyarrow.Field<n_legs: int64>
+        >>> table.field(1)
+        pyarrow.Field<animals: string>
+        """
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping: Mapping[str, ArrayOrChunkedArray[Any] | list | np.ndarray],
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from Arrow arrays or columns.
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> pydict = {"n_legs": n_legs, "animals": animals}
+
+        Construct a Table from a dictionary of arrays:
+
+        >>> pa.Table.from_pydict(pydict)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_pydict(pydict).schema
+        n_legs: int64
+        animals: string
+
+        Construct a Table from a dictionary of arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a dictionary of arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pydict(pydict, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from list of rows / dictionaries.
+
+        Parameters
+        ----------
+        mapping : list of dicts of rows
+            A mapping of strings to row values.
+        schema : Schema, default None
+            If not passed, will be inferred from the first row of the
+            mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+
+        Construct a Table from a list of rows:
+
+        >>> pa.Table.from_pylist(pylist)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Dog"]]
+
+        Construct a Table from a list of rows with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pylist(pylist, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a list of rows with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    def itercolumns(self) -> Generator[_ColumnT, None, None]:
+        """
+        Iterator over all columns in their numerical order.
+
+        Yields
+        ------
+        Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> for i in table.itercolumns():
+        ...     print(i.null_count)
+        2
+        1
+        """
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]:
+        """
+        Dimensions of the table or record batch: (#rows, #columns).
+
+        Returns
+        -------
+        (int, int)
+            Number of rows and number of columns.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table.shape
+        (4, 2)
+        """
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self:
+        """
+        Sort the Table or RecordBatch by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        Table or RecordBatch
+            A new tabular object sorted according to the sort keys.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.sort_by("animal")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,2021,2021,2020,2022,2022]]
+        n_legs: [[5,100,4,2,4,2]]
+        animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select rows from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the tabular object whose rows will be returned.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, containing the taken rows.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.take([1, 3])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def filter(
+        self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
+    ) -> Self:
+        """
+        Select rows from the table or record batch based on a boolean mask.
+
+        The Table can be filtered based on a mask, which will be passed to
+        :func:`pyarrow.compute.filter` to perform the filtering, or it can
+        be filtered through a boolean :class:`.Expression`
+
+        Parameters
+        ----------
+        mask : Array or array-like or .Expression
+            The boolean mask or the :class:`.Expression` to filter the table with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled, does nothing if
+            an :class:`.Expression` is used.
+
+        Returns
+        -------
+        filtered : Table or RecordBatch
+            A tabular object of the same schema, with only the rows selected
+            by applied filtering
+
+        Examples
+        --------
+        Using a Table (works similarly for RecordBatch):
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Define an expression and select rows:
+
+        >>> import pyarrow.compute as pc
+        >>> expr = pc.field("year") <= 2020
+        >>> table.filter(expr)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2019]]
+        n_legs: [[2,5]]
+        animals: [["Flamingo","Brittle stars"]]
+
+        Define a mask and select rows:
+
+        >>> mask = [True, True, False, None]
+        >>> table.filter(mask)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022]]
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Horse"]]
+        >>> table.filter(mask, null_selection_behavior="emit_null")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,null]]
+        n_legs: [[2,4,null]]
+        animals: [["Flamingo","Horse",null]]
+        """
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list]:
+        """
+        Convert the Table or RecordBatch to a dict or OrderedDict.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        dict
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> table.to_pydict()
+        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
+        """
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]:
+        """
+        Convert the Table or RecordBatch to a list of rows / dictionaries.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        list
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> data = [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]]
+        >>> table = pa.table(data, names=["n_legs", "animals"])
+        >>> table.to_pylist()
+        [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
+        """
+    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str:
+        """
+        Return human-readable string representation of Table or RecordBatch.
+
+        Parameters
+        ----------
+        show_metadata : bool, default False
+            Display Field-level and Schema-level KeyValueMetadata.
+        preview_cols : int, default 0
+            Display values of the columns for the first N columns.
+
+        Returns
+        -------
+        str
+        """
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new Table or RecordBatch.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Raises
+        ------
+        KeyError
+            If any of the passed column names do not exist.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object without the column(s).
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Drop one column:
+
+        >>> table.drop_columns("animals")
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+
+        Drop one or more columns:
+
+        >>> table.drop_columns(["n_legs", "animals"])
+        pyarrow.Table
+        ...
+        ----
+        """
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self: ...
+    def append_column(self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list) -> Self:
+        """
+        Append column at end of columns.
+
+        Parameters
+        ----------
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        Table or RecordBatch
+            New table or record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Append column at the end:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.append_column("year", [year])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        year: [[2021,2022,2019,2021]]
+        """
+
+class RecordBatch(_Tabular[Array]):
+    """
+    Batch of rows of columns of equal length
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatch.from_*`` functions instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Constructing a RecordBatch from arrays:
+
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Constructing a RecordBatch from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.RecordBatch.from_pandas(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_pandas(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Constructing a RecordBatch from pylist:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+    >>> pa.RecordBatch.from_pylist(pylist).to_pandas()
+       n_legs   animals
+    0       2  Flamingo
+    1       4       Dog
+
+    You can also construct a RecordBatch using :func:`pyarrow.record_batch`:
+
+    >>> pa.record_batch([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    """
+
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def replace_schema_metadata(self, metadata: dict | None = None) -> Self:
+        """
+        Create shallow copy of record batch by replacing schema
+        key-value metadata with the indicated new metadata (which may be None,
+        which deletes any existing metadata
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        shallow_copy : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+
+        Constructing a RecordBatch with schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64())], metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs], schema=my_schema)
+        >>> batch.schema
+        n_legs: int64
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Shallow copy of a RecordBatch with deleted schema metadata:
+
+        >>> batch.replace_schema_metadata().schema
+        n_legs: int64
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_columns
+        2
+        """
+
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows
+
+        Due to the definition of a RecordBatch, all columns have the same
+        number of rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_rows
+        6
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the RecordBatch and its columns
+
+        Returns
+        -------
+        pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the record batch.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.nbytes
+        116
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the record batch
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.get_total_buffer_size()
+        120
+        """
+
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self:
+        """
+        Add column to RecordBatch at position i.
+
+        A new record batch is returned with the column added, the original record batch
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.add_column(0, "year", year)
+        pyarrow.RecordBatch
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2021,2022,2019,2021]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Original record batch is left unchanged:
+
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new RecordBatch with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New record batch without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.remove_column(1)
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self:
+        """
+        Replace column in RecordBatch at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.set_column(1, "year", year)
+        pyarrow.RecordBatch
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [2,4,5,100]
+        year: [2021,2022,2019,2021]
+        """
+    @overload
+    def rename_columns(self, names: list[str]) -> Self: ...
+    @overload
+    def rename_columns(self, names: dict[str, str]) -> Self: ...
+    def rename_columns(self, names):
+        """
+        Create new record batch with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write RecordBatch to Buffer as encapsulated IPC message, which does not
+        include a Schema.
+
+        To reconstruct a RecordBatch from the encapsulated IPC message Buffer
+        returned by this function, a Schema must be passed separately. See
+        Examples.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> buf = batch.serialize()
+        >>> buf
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
+
+        Reconstruct RecordBatch from IPC message Buffer and original Schema
+
+        >>> pa.ipc.read_record_batch(buf, batch.schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this RecordBatch
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of record batch to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> batch.slice(offset=3).to_pandas()
+           n_legs        animals
+        0       4          Horse
+        1       5  Brittle stars
+        2     100      Centipede
+        >>> batch.slice(length=2).to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       2    Parrot
+        >>> batch.slice(offset=3, length=1).to_pandas()
+           n_legs animals
+        0       4   Horse
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> bool:
+        """
+        Check if contents of two record batches are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.RecordBatch
+            RecordBatch to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch_0 = pa.record_batch([])
+        >>> batch_1 = pa.RecordBatch.from_arrays(
+        ...     [n_legs, animals],
+        ...     names=["n_legs", "animals"],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> batch.equals(batch)
+        True
+        >>> batch.equals(batch_0)
+        False
+        >>> batch.equals(batch_1)
+        True
+        >>> batch.equals(batch_1, check_metadata=True)
+        False
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the RecordBatch.
+
+        Returns a new RecordBatch with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+
+        Select columns my indices:
+
+        >>> batch.select([1])
+        pyarrow.RecordBatch
+        animals: string
+        ----
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+        Select columns by names:
+
+        >>> batch.select(["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,2,4,4,5,100]
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast record batch values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast batch values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> batch.cast(target_schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[Array],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> Self:
+        """
+        Construct a RecordBatch from multiple pyarrow.Arrays
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array
+            One for each field in RecordBatch
+        names : list of str, optional
+            Names for the batch fields. If not passed, schema must be passed
+        schema : Schema, default None
+            Schema for the created batch. If not passed, names must be passed
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+
+        Construct a RecordBatch from pyarrow Arrays using names:
+
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+
+        Construct a RecordBatch from pyarrow Arrays using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow RecordBatch
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the RecordBatch. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``RecordBatch``. The default of None will store the index as a
+            column, except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022],
+        ...         "month": [3, 5, 7, 9],
+        ...         "day": [1, 5, 9, 13],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Convert pandas DataFrame to RecordBatch:
+
+        >>> import pyarrow as pa
+        >>> pa.RecordBatch.from_pandas(df)
+        pyarrow.RecordBatch
+        year: int64
+        month: int64
+        day: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2020,2022,2021,2022]
+        month: [3,5,7,9]
+        day: [1,5,9,13]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_pandas(df, schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch specifying columns:
+
+        >>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
+    ) -> Self:
+        """
+        Construct a RecordBatch from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``RecordBatch``.
+
+        Parameters
+        ----------
+        struct_array : StructArray
+            Array to construct the record batch from.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.RecordBatch.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(self) -> StructArray:
+        """
+        Convert to a struct array.
+        """
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor:
+        """
+        Convert to a :class:`~pyarrow.Tensor`.
+
+        RecordBatches that can be converted have fields of type signed or unsigned
+        integer or float, including all bit-widths.
+
+        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
+        any nulls are present. RecordBatches with nulls can be converted with ``null_to_nan``
+        set to ``True``. In this case null values are converted to ``NaN`` and integer type
+        arrays are promoted to the appropriate float type.
+
+        Parameters
+        ----------
+        null_to_nan : bool, default False
+            Whether to write null values in the result as ``NaN``.
+        row_major : bool, default True
+            Whether resulting Tensor is row-major or column-major
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch(
+        ...     [
+        ...         pa.array([1, 2, 3, 4, None], type=pa.int32()),
+        ...         pa.array([10, 20, 30, 40, None], type=pa.float32()),
+        ...     ],
+        ...     names=["a", "b"],
+        ... )
+
+        >>> batch
+        pyarrow.RecordBatch
+        a: int32
+        b: float
+        ----
+        a: [1,2,3,4,null]
+        b: [10,20,30,40,null]
+
+        Convert a RecordBatch to row-major Tensor with null values
+        written as ``NaN``s
+
+        >>> batch.to_tensor(null_to_nan=True)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (16, 8)
+        >>> batch.to_tensor(null_to_nan=True).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+
+        Convert a RecordBatch to column-major Tensor
+
+        >>> batch.to_tensor(null_to_nan=True, row_major=False)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (8, 40)
+        >>> batch.to_tensor(null_to_nan=True, row_major=False).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+        """
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0):
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_array__(self, requested_schema=None):
+        """
+        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this schema.
+            If None, the batch will be returned as-is, with a schema matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
+            respectively.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the batch as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a C ArrowSchema
+        and ArrowArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs):
+        """
+        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
+        of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this data type.
+            If None, the batch will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+        kwargs
+            Currently no additional keyword arguments are supported, but
+            this method will accept any keyword with a value of ``None``
+            for compatibility with future keywords.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a
+        C ArrowSchema and ArrowDeviceArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowDeviceArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the arrays in the RecordBatch reside.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the RecordBatch's arrays are CPU-accessible.
+        """
+    def copy_to(self, destination: MemoryManager | Device) -> Self:
+        """
+        Copy the entire RecordBatch to destination device.
+
+        This copies each column of the record batch to create
+        a new record batch where all underlying buffers for the columns have
+        been copied to the destination MemoryManager.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        RecordBatch
+        """
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+class Table(_Tabular[ChunkedArray[Any]]):
+    """
+    A collection of top-level named, equal length Arrow arrays.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the ``from_*``
+    methods instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from arrays:
+
+    >>> pa.Table.from_arrays([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a RecordBatch:
+
+    >>> batch = pa.record_batch([n_legs, animals], names=names)
+    >>> pa.Table.from_batches([batch])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.Table.from_pandas(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a dictionary of arrays:
+
+    >>> pydict = {"n_legs": n_legs, "animals": animals}
+    >>> pa.Table.from_pydict(pydict)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    >>> pa.Table.from_pydict(pydict).schema
+    n_legs: int64
+    animals: string
+
+    Construct a Table from a dictionary of arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from a list of rows:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"year": 2021, "animals": "Centipede"}]
+    >>> pa.Table.from_pylist(pylist)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,null]]
+    animals: [["Flamingo","Centipede"]]
+
+    Construct a Table from a list of rows with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [
+    ...         pa.field("year", pa.int64()),
+    ...         pa.field("n_legs", pa.int64()),
+    ...         pa.field("animals", pa.string()),
+    ...     ],
+    ...     metadata={"year": "Year of entry"},
+    ... )
+    >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+    year: int64
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    year: 'Year of entry'
+
+    Construct a Table with :func:`pyarrow.table`:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    """
+
+    def validate(self, *, full=False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def slice(self, offset=0, length=None) -> Self:
+        """
+        Compute zero-copy slice of this Table.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of table to slice.
+        length : int, default None
+            Length of slice (default is until end of table starting from
+            offset).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.slice(length=3)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,2019]]
+        n_legs: [[2,4,5]]
+        animals: [["Flamingo","Horse","Brittle stars"]]
+        >>> table.slice(offset=2)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019,2021]]
+        n_legs: [[5,100]]
+        animals: [["Brittle stars","Centipede"]]
+        >>> table.slice(offset=2, length=1)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019]]
+        n_legs: [[5]]
+        animals: [["Brittle stars"]]
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the Table.
+
+        Returns a new Table with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.select([0, 1])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        >>> table.select(["year"])
+        pyarrow.Table
+        year: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        """
+    def replace_schema_metadata(self, metadata: dict | None = None) -> Self:
+        """
+        Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be None),
+        which deletes any existing metadata.
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Constructing a Table with pyarrow schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> table = pa.table(df, my_schema)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        pandas: ...
+
+        Create a shallow copy of a Table with deleted schema metadata:
+
+        >>> table.replace_schema_metadata().schema
+        n_legs: int64
+        animals: string
+
+        Create a shallow copy of a Table with new schema metadata:
+
+        >>> metadata = {"animals": "Which animal"}
+        >>> table.replace_schema_metadata(metadata=metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Which animal'
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Flatten this Table.
+
+        Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> month = pa.array([4, 6])
+        >>> table = pa.Table.from_arrays([struct, month], names=["a", "month"])
+        >>> table
+        pyarrow.Table
+        a: struct<animals: string, n_legs: int64, year: int64>
+          child 0, animals: string
+          child 1, n_legs: int64
+          child 2, year: int64
+        month: int64
+        ----
+        a: [
+          -- is_valid: all not null
+          -- child 0 type: string
+        ["Parrot",null]
+          -- child 1 type: int64
+        [2,4]
+          -- child 2 type: int64
+        [null,2022]]
+        month: [[4,6]]
+
+        Flatten the columns with struct field:
+
+        >>> table.flatten()
+        pyarrow.Table
+        a.animals: string
+        a.n_legs: int64
+        a.year: int64
+        month: int64
+        ----
+        a.animals: [["Parrot",null]]
+        a.n_legs: [[2,4]]
+        a.year: [[null,2022]]
+        month: [[4,6]]
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Make a new table by combining the chunks this table has.
+
+        All the underlying chunks in the ChunkedArray of each column are
+        concatenated into zero or one chunk.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.table([n_legs, animals], names=names)
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4],[4,5,100]]
+        animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+        >>> table.combine_chunks()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent table, but where all chunks of
+        each column share the same dictionary values.  Dictionary indices
+        are transposed accordingly.
+
+        Columns without dictionaries are returned unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> table = pa.table([c_arr], names=["animals"])
+        >>> table
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2]]
+
+        Unify dictionaries across both chunks:
+
+        >>> table.unify_dictionaries()
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [3,4,5]]
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> Self:
+        """
+        Check if contents of two tables are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.Table
+            Table to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=names)
+        >>> table_0 = pa.Table.from_arrays([])
+        >>> table_1 = pa.Table.from_arrays(
+        ...     [n_legs, animals], names=names, metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> table.equals(table)
+        True
+        >>> table.equals(table_0)
+        False
+        >>> table.equals(table_1)
+        True
+        >>> table.equals(table_1, check_metadata=True)
+        False
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast table values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast table values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> table.cast(target_schema=my_schema)
+        pyarrow.Table
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+        safe: bool = True,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow Table.
+
+        The column types in the resulting Arrow Table are inferred from the
+        dtypes of the pandas.Series in the DataFrame. In the case of non-object
+        Series, the NumPy dtype is translated to its Arrow equivalent. In the
+        case of `object`, we need to guess the datatype by looking at the
+        Python objects in this Series.
+
+        Be aware that Series of the `object` dtype don't carry enough
+        information to always lead to a meaningful Arrow type. In the case that
+        we cannot infer a type, e.g. because the DataFrame is of length 0 or
+        the Series only contains None/nan objects, the type is set to
+        null. This behavior can be avoided by constructing an explicit schema
+        and passing it to this function.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the Arrow Table. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``Table``. The default of None will store the index as a column,
+            except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+        safe : bool, default True
+           Check for overflows or other unsafe conversions.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> pa.Table.from_pandas(df)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[ArrayOrChunkedArray[Any]],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> Self:
+        """
+        Construct a Table from Arrow arrays.
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
+            Equal-length arrays that should form the table.
+        names : list of str, optional
+            Names for the table columns. If not passed, schema must be passed.
+        schema : Schema, default None
+            Schema for the created table. If not passed, names must be passed.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+
+        Construct a Table from arrays:
+
+        >>> pa.Table.from_arrays([n_legs, animals], names=names)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"animals": "Name of the animal species"},
+        ... )
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Name of the animal species'
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
+    ) -> Self:
+        """
+        Construct a Table from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``Table``.
+
+        Parameters
+        ----------
+        struct_array : StructArray or ChunkedArray
+            Array to construct the table from.
+
+        Returns
+        -------
+        pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.Table.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[scalar.StructScalar]:
+        """
+        Convert to a chunked array of struct type.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for ChunkedArray chunks. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self:
+        """
+        Construct a Table from a sequence or iterator of Arrow RecordBatches.
+
+        Parameters
+        ----------
+        batches : sequence or iterator of RecordBatch
+            Sequence of RecordBatch to be converted, all schemas must be equal.
+        schema : Schema, default None
+            If not passed, will be inferred from the first RecordBatch.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+        >>> batch = pa.record_batch([n_legs, animals], names=names)
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Construct a Table from a RecordBatch:
+
+        >>> pa.Table.from_batches([batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from a sequence of RecordBatches:
+
+        >>> pa.Table.from_batches([batch, batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100],[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]:
+        """
+        Convert Table to a list of RecordBatch objects.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        list[RecordBatch]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatch:
+
+        >>> table.to_batches()[0].to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Convert a Table to a list of RecordBatches:
+
+        >>> table.to_batches(max_chunksize=2)[0].to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       4     Horse
+        >>> table.to_batches(max_chunksize=2)[1].to_pandas()
+           n_legs        animals
+        0       5  Brittle stars
+        1     100      Centipede
+        """
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader:
+        """
+        Convert the Table to a RecordBatchReader.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        RecordBatchReader
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatchReader:
+
+        >>> table.to_reader()
+        <pyarrow.lib.RecordBatchReader object at ...>
+
+        >>> reader = table.to_reader()
+        >>> reader.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+        >>> reader.read_all()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the table and its columns.
+
+        Returns
+        -------
+        Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' ...
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns in this table.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_columns
+        2
+        """
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows in this table.
+
+        Due to the definition of a table, all columns have the same number of
+        rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_rows
+        4
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the table.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.nbytes
+        72
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the table.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.get_total_buffer_size()
+        76
+        """
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self:
+        """
+        Add column to Table at position.
+
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.add_column(0, "year", [year])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2021,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Original table is left unchanged:
+
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new Table with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New table without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.remove_column(1)
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        """
+    def set_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self:
+        """
+        Replace column in Table at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.set_column(1, "year", [year])
+        pyarrow.Table
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        year: [[2021,2022,2019,2021]]
+        """
+    @overload
+    def rename_columns(self, names: list[str]) -> Self: ...
+    @overload
+    def rename_columns(self, names: dict[str, str]) -> Self: ...
+    def rename_columns(self, names):
+        """
+        Create new table with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def drop(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new table.
+
+        Alias of Table.drop_columns, but kept for backwards compatibility.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Returns
+        -------
+        Table
+            New table without the column(s).
+        """
+    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy:
+        """
+        Declare a grouping over the columns of the table.
+
+        Resulting grouping can then be used to perform aggregations
+        with a subsequent ``aggregate()`` method.
+
+        Parameters
+        ----------
+        keys : str or list[str]
+            Name of the columns that should be used as the grouping key.
+        use_threads : bool, default True
+            Whether to use multithreading or not. When set to True (the
+            default), no stable ordering of the output is guaranteed.
+
+        Returns
+        -------
+        TableGroupBy
+
+        See Also
+        --------
+        TableGroupBy.aggregate
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.group_by("year").aggregate([("n_legs", "sum")])
+        pyarrow.Table
+        year: int64
+        n_legs_sum: int64
+        ----
+        year: [[2020,2022,2021,2019]]
+        n_legs_sum: [[2,6,104,5]]
+        """
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self:
+        """
+        Perform a join between this table and another one.
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        keys : str or list[str]
+            The columns from current table that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to left column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the right column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whether to use multithreading or not.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df1 = pd.DataFrame({"id": [1, 2, 3], "year": [2020, 2022, 2019]})
+        >>> df2 = pd.DataFrame(
+        ...     {"id": [3, 4], "n_legs": [5, 100], "animal": ["Brittle stars", "Centipede"]}
+        ... )
+        >>> t1 = pa.Table.from_pandas(df1)
+        >>> t2 = pa.Table.from_pandas(df2)
+
+        Left outer join:
+
+        >>> t1.join(t2, "id").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2]]
+        year: [[2019,2020,2022]]
+        n_legs: [[5,null,null]]
+        animal: [["Brittle stars",null,null]]
+
+        Full outer join:
+
+        >>> t1.join(t2, "id", join_type="full outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2,4]]
+        year: [[2019,2020,2022,null]]
+        n_legs: [[5,null,null,100]]
+        animal: [["Brittle stars",null,null,"Centipede"]]
+
+        Right outer join:
+
+        >>> t1.join(t2, "id", join_type="right outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        year: int64
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,null]]
+        id: [[3,4]]
+        n_legs: [[5,100]]
+        animal: [["Brittle stars","Centipede"]]
+
+        Right anti join
+
+        >>> t1.join(t2, "id", join_type="right anti")
+        pyarrow.Table
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[4]]
+        n_legs: [[100]]
+        animal: [["Centipede"]]
+        """
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self:
+        """
+        Perform an asof join between this table and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both tables must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        on : str
+            The column from current table that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input dataset must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current table that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row ``right.on - left.on <= tolerance``. The
+            ``tolerance`` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_table that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left table.
+        right_by : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+
+        Returns
+        -------
+        Table
+
+        Example
+        --------
+        >>> import pyarrow as pa
+        >>> t1 = pa.table({"id": [1, 3, 2, 3, 3], "year": [2020, 2021, 2022, 2022, 2023]})
+        >>> t2 = pa.table(
+        ...     {
+        ...         "id": [3, 4],
+        ...         "year": [2020, 2021],
+        ...         "n_legs": [5, 100],
+        ...         "animal": ["Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        >>> t1.join_asof(t2, on="year", by="id", tolerance=-2)
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[1,3,2,3,3]]
+        year: [[2020,2021,2022,2022,2023]]
+        n_legs: [[null,5,null,5,null]]
+        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the table as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all ChunkedArrays are CPU-accessible.
+        """
+
+def record_batch(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[Array[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[Any, Any] | None = None,
+) -> RecordBatch:
+    """
+    Create a pyarrow.RecordBatch from another Python data structure or sequence
+    of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of Arrays,
+        a pandas DataFame, or any tabular object implementing the
+        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
+        ``__arrow_c_device_array__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the RecordBatch. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+
+    Returns
+    -------
+    RecordBatch
+
+    See Also
+    --------
+    RecordBatch.from_arrays, RecordBatch.from_pandas, table
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a RecordBatch from a python dictionary:
+
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Creating a RecordBatch from a list of arrays with names:
+
+    >>> pa.record_batch([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+    Creating a RecordBatch from a list of arrays with names and metadata:
+
+    >>> my_metadata = {"n_legs": "How many legs does an animal have?"}
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'How many legs does an animal have?'
+
+    Creating a RecordBatch from a pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+    >>> pa.record_batch(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Creating a RecordBatch from a pandas DataFrame with schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.record_batch(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: ...
+    >>> pa.record_batch(df, my_schema).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       4          Horse
+    2       5  Brittle stars
+    3     100      Centipede
+    """
+
+@overload
+def table(
+    data: dict[str, list[Any] | Array[Any]],
+    schema: Schema | None = None,
+    metadata: Mapping[Any, Any] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+@overload
+def table(
+    data: Collection[ArrayOrChunkedArray[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[Any, Any] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+def table(*args, **kwargs):
+    """
+    Create a pyarrow.Table from a Python data structure or sequence of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of arrays or
+        chunked arrays, a pandas DataFame, or any tabular object implementing
+        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__``,
+        ``__arrow_c_device_array__`` or ``__arrow_c_stream__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the Arrow Table. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+        If passed, the output will have exactly this schema (raising an error
+        when columns are not found in the data and ignoring additional data not
+        specified in the schema, when data is a dict or DataFrame).
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+    nthreads : int, default None
+        For pandas.DataFrame inputs: if greater than 1, convert columns to
+        Arrow in parallel using indicated number of threads. By default,
+        this follows :func:`pyarrow.cpu_count` (may use up to system CPU count
+        threads).
+
+    Returns
+    -------
+    Table
+
+    See Also
+    --------
+    Table.from_arrays, Table.from_pandas, Table.from_pydict
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from a python dictionary:
+
+    >>> pa.table({"n_legs": n_legs, "animals": animals})
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.table([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.table(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.table(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: '{"index_columns": [], "column_indexes": [{"name": null, ...
+
+    Construct a Table from chunked arrays:
+
+    >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    >>> animals = pa.chunked_array(
+    ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+    ... )
+    >>> table = pa.table([n_legs, animals], names=names)
+    >>> table
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,2,4],[4,5,100]]
+    animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+    """
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table:
+    """
+    Concatenate pyarrow.Table objects.
+
+    If promote_options="none", a zero-copy concatenation will be performed. The schemas
+    of all the Tables must be the same (except the metadata), otherwise an
+    exception will be raised. The result Table will share the metadata with the
+    first table.
+
+    If promote_options="default", any null type arrays will be casted to the type of other
+    arrays in the column of the same name. If a table is missing a particular
+    field, null values of the appropriate type will be generated to take the
+    place of the missing field. The new schema will share the metadata with the
+    first table. Each field in the new schema will share the metadata with the
+    first table which has the field defined. Note that type promotions may
+    involve additional allocations on the given ``memory_pool``.
+
+    If promote_options="permissive", the behavior of default plus types will be promoted
+    to the common denominator that fits all the fields.
+
+    Parameters
+    ----------
+    tables : iterable of pyarrow.Table objects
+        Pyarrow tables to concatenate into a single Table.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+    promote_options : str, default none
+        Accepts strings "none", "default" and "permissive".
+    **kwargs : dict, optional
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.table(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.table([pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"])
+    >>> pa.concat_tables([t1, t2])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100],[2,4]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Parrot","Dog"]]
+
+    """
+
+class TableGroupBy:
+    """
+    A grouping of columns in a table on which to perform aggregations.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+        Input table to execute the aggregation on.
+    keys : str or list[str]
+        Name of the grouped columns.
+    use_threads : bool, default True
+        Whether to use multithreading or not. When set to True (the default),
+        no stable ordering of the output is guaranteed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t = pa.table(
+    ...     [
+    ...         pa.array(["a", "a", "b", "b", "c"]),
+    ...         pa.array([1, 2, 3, 4, 5]),
+    ...     ],
+    ...     names=["keys", "values"],
+    ... )
+
+    Grouping of columns:
+
+    >>> pa.TableGroupBy(t, "keys")
+    <pyarrow.lib.TableGroupBy object at ...>
+
+    Perform aggregations:
+
+    >>> pa.TableGroupBy(t, "keys").aggregate([("values", "sum")])
+    pyarrow.Table
+    keys: string
+    values_sum: int64
+    ----
+    keys: [["a","b","c"]]
+    values_sum: [[3,7,5]]
+    """
+
+    keys: str | list[str]
+    def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table:
+        """
+        Perform an aggregation over the grouped columns of the table.
+
+        Parameters
+        ----------
+        aggregations : list[tuple(str, str)] or \
+list[tuple(str, str, FunctionOptions)]
+            List of tuples, where each tuple is one aggregation specification
+            and consists of: aggregation column name followed
+            by function name and optionally aggregation function option.
+            Pass empty list to get a single row for each group.
+            The column name can be a string, an empty list or a list of
+            column names, for unary, nullary and n-ary aggregation functions
+            respectively.
+
+            For the list of function names and respective aggregation
+            function options see :ref:`py-grouped-aggrs`.
+
+        Returns
+        -------
+        Table
+            Results of the aggregation functions.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.table([
+        ...       pa.array(["a", "a", "b", "b", "c"]),
+        ...       pa.array([1, 2, 3, 4, 5]),
+        ... ], names=["keys", "values"])
+
+        Sum the column "values" over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([("values", "sum")])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+
+        Count the rows over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([([], "count_all")])
+        pyarrow.Table
+        keys: string
+        count_all: int64
+        ----
+        keys: [["a","b","c"]]
+        count_all: [[2,2,1]]
+
+        Do multiple aggregations:
+
+        >>> t.group_by("keys").aggregate([
+        ...    ("values", "sum"),
+        ...    ("keys", "count")
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        keys_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+        keys_count: [[2,2,1]]
+
+        Count the number of non-null values for column "values"
+        over the grouped column "keys":
+
+        >>> import pyarrow.compute as pc
+        >>> t.group_by(["keys"]).aggregate([
+        ...    ("values", "count", pc.CountOptions(mode="only_valid"))
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_count: [[2,2,1]]
+
+        Get a single row for each group in column "keys":
+
+        >>> t.group_by("keys").aggregate([])
+        pyarrow.Table
+        keys: string
+        ----
+        keys: [["a","b","c"]]
+        """
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch:
+    """
+    Concatenate pyarrow.RecordBatch objects.
+
+    All recordbatches must share the same Schema,
+    the operation implies a copy of the data to merge
+    the arrays of the different RecordBatches.
+
+    Parameters
+    ----------
+    recordbatches : iterable of pyarrow.RecordBatch objects
+        Pyarrow record batches to concatenate into a single RecordBatch.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.record_batch(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.record_batch(
+    ...     [pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"]
+    ... )
+    >>> pa.concat_batches([t1, t2])
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,4,5,100,2,4]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede","Parrot","Dog"]
+
+    """
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+]
diff --git a/python/stubs/__lib_pxi/tensor.pyi b/python/stubs/__lib_pxi/tensor.pyi
new file mode 100644
index 00000000000..d849abd0f1f
--- /dev/null
+++ b/python/stubs/__lib_pxi/tensor.pyi
@@ -0,0 +1,688 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+import numpy as np
+
+from pyarrow.lib import _Weakrefable
+from scipy.sparse import coo_matrix, csr_matrix
+from sparse import COO
+
+class Tensor(_Weakrefable):
+    """
+    A n-dimensional array a.k.a Tensor.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+    >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+    <pyarrow.Tensor>
+    type: int32
+    shape: (2, 3)
+    strides: (12, 4)
+    """
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Create a Tensor from a numpy array.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            The source numpy array
+        dim_names : list, optional
+            Names of each dimension of the Tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        <pyarrow.Tensor>
+        type: int32
+        shape: (2, 3)
+        strides: (12, 4)
+        """
+    def to_numpy(self) -> np.ndarray:
+        """
+        Convert arrow::Tensor to numpy.ndarray with zero copy
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.to_numpy()
+        array([[  2,   2,   4],
+               [  4,   5, 100]], dtype=int32)
+        """
+    def equals(self, other: Tensor) -> bool:
+        """
+        Return true if the tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : Tensor
+            The other tensor to compare for equality.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
+        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a", "b"])
+        >>> tensor.equals(tensor)
+        True
+        >>> tensor.equals(tensor2)
+        False
+        """
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.dim_name(0)
+        'dim1'
+        >>> tensor.dim_name(1)
+        'dim2'
+        """
+    @property
+    def dim_names(self) -> list[str]:
+        """
+        Names of this tensor dimensions.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.dim_names
+        ['dim1', 'dim2']
+        """
+    @property
+    def is_mutable(self) -> bool:
+        """
+        Is this tensor mutable or immutable.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.is_mutable
+        True
+        """
+    @property
+    def is_contiguous(self) -> bool:
+        """
+        Is this tensor contiguous in memory.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.is_contiguous
+        True
+        """
+    @property
+    def ndim(self) -> int:
+        """
+        The dimension (n) of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.ndim
+        2
+        """
+    @property
+    def size(self) -> str:
+        """
+        The size of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.size
+        6
+        """
+    @property
+    def shape(self) -> tuple[int, ...]:
+        """
+        The shape of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.shape
+        (2, 3)
+        """
+    @property
+    def strides(self) -> tuple[int, ...]:
+        """
+        Strides of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.strides
+        (12, 4)
+        """
+
+class SparseCOOTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCOOTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCOOTensor
+        """
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        coords: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCOOTensor from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the rows.
+        coords : numpy.ndarray
+            Coordinates of the data.
+        shape : tuple
+            Shape of the tensor.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csr_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_pydata_sparse(cls, obj: COO, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert pydata/sparse.COO to arrow::SparseCOOTensor.
+
+        Parameters
+        ----------
+        obj : pydata.sparse.COO
+            The sparse multidimensional array that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCOOTensor.
+
+        Parameters
+        ----------
+        obj : Tensor
+            The tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
+        """
+    def to_scipy(self) -> coo_matrix:
+        """
+        Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
+        """
+    def to_pydata_sparse(self) -> COO:
+        """
+        Convert arrow::SparseCOOTensor to pydata/sparse.COO.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCOOTensor to arrow::Tensor.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : SparseCOOTensor
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def has_canonical_format(self) -> bool: ...
+
+class SparseCSRMatrix(_Weakrefable):
+    """
+    A sparse CSR matrix.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSRMatrix
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            The dense numpy array that should be converted.
+        dim_names : list, optional
+            The names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSRMatrix
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSRMatrix from numpy.ndarrays.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse matrix.
+        indptr : numpy.ndarray
+            Range of the rows,
+            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+        indices : numpy.ndarray
+            Column indices of the corresponding non-zero values.
+        shape : tuple
+            Shape of the matrix.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csr_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSRMatrix.
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
+        """
+    def to_scipy(self) -> csr_matrix:
+        """
+        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSRMatrix to arrow::Tensor.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : SparseCSRMatrix
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSCMatrix(_Weakrefable):
+    """
+    A sparse CSC matrix.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSCMatrix
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSCMatrix from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse matrix.
+        indptr : numpy.ndarray
+            Range of the rows,
+            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+        indices : numpy.ndarray
+            Column indices of the corresponding non-zero values.
+        shape : tuple
+            Shape of the matrix.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csc_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
+        """
+    def to_scipy(self) -> csr_matrix:
+        """
+        Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSCMatrix to arrow::Tensor
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data
+
+        Parameters
+        ----------
+        other : SparseCSCMatrix
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSFTensor(_Weakrefable):
+    """
+    A sparse CSF tensor.
+
+    CSF is a generalization of compressed sparse row (CSR) index.
+
+    CSF index recursively compresses each dimension of a tensor into a set
+    of prefix trees. Each path from a root to leaf forms one tensor
+    non-zero index. CSF is implemented with two arrays of buffers and one
+    arrays of integers.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSFTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSFTensor
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSFTensor from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse tensor.
+        indptr : numpy.ndarray
+            The sparsity structure.
+            Each two consecutive dimensions in a tensor correspond to
+            a buffer in indices.
+            A pair of consecutive values at `indptr[dim][i]`
+            `indptr[dim][i + 1]` signify a range of nodes in
+            `indices[dim + 1]` who are children of `indices[dim][i]` node.
+        indices : numpy.ndarray
+            Stores values of nodes.
+            Each tensor dimension corresponds to a buffer in indptr.
+        shape : tuple
+            Shape of the matrix.
+        axis_order : list, optional
+            the sequence in which dimensions were traversed to
+            produce the prefix tree.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSFTensor
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSFTensor to arrow::Tensor
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data
+
+        Parameters
+        ----------
+        other : SparseCSFTensor
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+__all__ = [
+    "Tensor",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+]
diff --git a/python/stubs/__lib_pxi/types.pyi b/python/stubs/__lib_pxi/types.pyi
new file mode 100644
index 00000000000..7fe6c36e332
--- /dev/null
+++ b/python/stubs/__lib_pxi/types.pyi
@@ -0,0 +1,4413 @@
+import datetime as dt
+import sys
+
+from collections.abc import Mapping, Sequence
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import Any, Generic, Iterable, Iterator, Literal, overload
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowSchema
+from pyarrow.lib import (
+    Array,
+    ChunkedArray,
+    ExtensionArray,
+    MemoryPool,
+    MonthDayNano,
+    Table,
+)
+from typing_extensions import TypeVar, deprecated
+
+from .io import Buffer
+from .scalar import ExtensionScalar
+
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
+class _Weakrefable: ...
+class _Metadata(_Weakrefable): ...
+
+class DataType(_Weakrefable):
+    """
+    Base class of all Arrow data types.
+
+    Each data type is an *instance* of this class.
+
+    Examples
+    --------
+    Instance of int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int64()
+    DataType(int64)
+    """
+    def field(self, i: int) -> Field:
+        """
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Field
+        """
+    @property
+    def id(self) -> int: ...
+    @property
+    def bit_width(self) -> int:
+        """
+        Bit width for fixed width type.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().bit_width
+        64
+        """
+    @property
+    def byte_width(self) -> int:
+        """
+        Byte width for fixed width type.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().byte_width
+        8
+        """
+    @property
+    def num_fields(self) -> int:
+        """
+        The number of child fields.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().num_fields
+        0
+        >>> pa.list_(pa.string())
+        ListType(list<item: string>)
+        >>> pa.list_(pa.string()).num_fields
+        1
+        >>> struct = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct.num_fields
+        2
+        """
+    @property
+    def num_buffers(self) -> int:
+        """
+        Number of data buffers required to construct Array type
+        excluding children.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().num_buffers
+        2
+        >>> pa.string().num_buffers
+        3
+        """
+    def __hash__(self) -> int: ...
+    def equals(self, other: DataType | str, *, check_metadata: bool = False) -> bool:
+        """
+        Return true if type is equivalent to passed value.
+
+        Parameters
+        ----------
+        other : DataType or string convertible to DataType
+        check_metadata : bool
+            Whether nested Field metadata equality should be checked as well.
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().equals(pa.string())
+        False
+        >>> pa.int64().equals(pa.int64())
+        True
+        """
+    def to_pandas_dtype(self) -> np.generic:
+        """
+        Return the equivalent NumPy / Pandas dtype.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().to_pandas_dtype()
+        <class 'numpy.int64'>
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import DataType from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self:
+        """
+        Import a DataType from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+class _BasicDataType(DataType, Generic[_AsPyType]): ...
+class NullType(_BasicDataType[None]): ...
+class BoolType(_BasicDataType[bool]): ...
+class UInt8Type(_BasicDataType[int]): ...
+class Int8Type(_BasicDataType[int]): ...
+class UInt16Type(_BasicDataType[int]): ...
+class Int16Type(_BasicDataType[int]): ...
+class Uint32Type(_BasicDataType[int]): ...
+class Int32Type(_BasicDataType[int]): ...
+class UInt64Type(_BasicDataType[int]): ...
+class Int64Type(_BasicDataType[int]): ...
+class Float16Type(_BasicDataType[float]): ...
+class Float32Type(_BasicDataType[float]): ...
+class Float64Type(_BasicDataType[float]): ...
+class Date32Type(_BasicDataType[dt.date]): ...
+class Date64Type(_BasicDataType[dt.date]): ...
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ...
+class StringType(_BasicDataType[str]): ...
+class LargeStringType(_BasicDataType[str]): ...
+class StringViewType(_BasicDataType[str]): ...
+class BinaryType(_BasicDataType[bytes]): ...
+class LargeBinaryType(_BasicDataType[bytes]): ...
+class BinaryViewType(_BasicDataType[bytes]): ...
+
+_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+    """
+    Concrete class for timestamp data types.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    Create an instance of timestamp type:
+
+    >>> pa.timestamp("us")
+    TimestampType(timestamp[us])
+
+    Create an instance of timestamp type with timezone:
+
+    >>> pa.timestamp("s", tz="UTC")
+    TimestampType(timestamp[s, tz=UTC])
+    """
+    @property
+    def unit(self) -> _Unit:
+        """
+        The timestamp unit ('s', 'ms', 'us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.timestamp("us")
+        >>> t.unit
+        'us'
+        """
+    @property
+    def tz(self) -> _Tz:
+        """
+        The timestamp time zone, if any, or None.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.timestamp("s", tz="UTC")
+        >>> t.tz
+        'UTC'
+        """
+
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
+
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
+    """
+    Concrete class for time32 data types.
+
+    Supported time unit resolutions are 's' [second]
+    and 'ms' [millisecond].
+
+    Examples
+    --------
+    Create an instance of time32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.time32("ms")
+    Time32Type(time32[ms])
+    """
+    @property
+    def unit(self) -> _Time32Unit:
+        """
+        The time unit ('s' or 'ms').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.time32("ms")
+        >>> t.unit
+        'ms'
+        """
+
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
+
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
+    """
+    Concrete class for time64 data types.
+
+    Supported time unit resolutions are 'us' [microsecond]
+    and 'ns' [nanosecond].
+
+    Examples
+    --------
+    Create an instance of time64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.time64("us")
+    Time64Type(time64[us])
+    """
+    @property
+    def unit(self) -> _Time64Unit:
+        """
+        The time unit ('us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.time64("us")
+        >>> t.unit
+        'us'
+        """
+
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
+    """
+    Concrete class for duration data types.
+
+    Examples
+    --------
+    Create an instance of duration type:
+
+    >>> import pyarrow as pa
+    >>> pa.duration("s")
+    DurationType(duration[s])
+    """
+    @property
+    def unit(self) -> _Unit:
+        """
+        The duration unit ('s', 'ms', 'us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.duration("s")
+        >>> t.unit
+        's'
+        """
+
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+    """
+    Concrete class for fixed-size binary data types.
+
+    Examples
+    --------
+    Create an instance of fixed-size binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary(3)
+    FixedSizeBinaryType(fixed_size_binary[3])
+    """
+
+_Precision = TypeVar("_Precision", default=Any)
+_Scale = TypeVar("_Scale", default=Any)
+
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal32 data types.
+
+    Examples
+    --------
+    Create an instance of decimal32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal32(5, 2)
+    Decimal32Type(decimal32(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal32(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal32(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal64 data types.
+
+    Examples
+    --------
+    Create an instance of decimal64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal64(5, 2)
+    Decimal64Type(decimal64(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal64(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal64(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal128 data types.
+
+    Examples
+    --------
+    Create an instance of decimal128 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal128(5, 2)
+    Decimal128Type(decimal128(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal128(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal128(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal256 data types.
+
+    Examples
+    --------
+    Create an instance of decimal256 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal256(76, 38)
+    Decimal256Type(decimal256(76, 38))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal256(76, 38)
+        >>> t.precision
+        76
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal256(76, 38)
+        >>> t.scale
+        38
+        """
+
+class ListType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for list data types.
+
+    Examples
+    --------
+    Create an instance of ListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.string())
+    ListType(list<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.string()).value_type
+        DataType(string)
+        """
+
+class LargeListType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for large list data types
+    (like ListType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list(pa.string())
+    LargeListType(large_list<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list(pa.string()).value_type
+        DataType(string)
+        """
+
+class ListViewType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for list view data types.
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_type
+        DataType(string)
+        """
+
+class LargeListViewType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for large list view data types
+    (like ListViewType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.string())
+    LargeListViewType(large_list_view<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_type
+        DataType(string)
+        """
+
+class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
+    """
+    Concrete class for fixed size list data types.
+
+    Examples
+    --------
+    Create an instance of FixedSizeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.int32(), 2)
+    FixedSizeListType(fixed_size_list<item: int32>[2])
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).value_field
+        pyarrow.Field<item: int32>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).value_type
+        DataType(int32)
+        """
+    @property
+    def list_size(self) -> _Size:
+        """
+        The size of the fixed size lists.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).list_size
+        2
+        """
+
+class DictionaryMemo(_Weakrefable):
+    """
+    Tracking container for dictionary-encoded fields.
+    """
+
+_IndexT = TypeVar(
+    "_IndexT",
+    UInt8Type,
+    Int8Type,
+    UInt16Type,
+    Int16Type,
+    Uint32Type,
+    Int32Type,
+    UInt64Type,
+    Int64Type,
+)
+_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
+_ValueT = TypeVar("_ValueT", bound=DataType)
+_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
+
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
+    """
+    Concrete class for dictionary data types.
+
+    Examples
+    --------
+    Create an instance of dictionary type:
+
+    >>> import pyarrow as pa
+    >>> pa.dictionary(pa.int64(), pa.utf8())
+    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
+    """
+
+    @property
+    def ordered(self) -> _Ordered:
+        """
+        Whether the dictionary is ordered, i.e. whether the ordering of values
+        in the dictionary is important.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int64(), pa.utf8()).ordered
+        False
+        """
+    @property
+    def index_type(self) -> _IndexT:
+        """
+        The data type of dictionary indices (a signed integer type).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int16(), pa.utf8()).index_type
+        DataType(int16)
+        """
+    @property
+    def value_type(self) -> _BasicValueT:
+        """
+        The dictionary value type.
+
+        The dictionary values are found in an instance of DictionaryArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int16(), pa.utf8()).value_type
+        DataType(string)
+        """
+
+_K = TypeVar("_K", bound=DataType)
+
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
+    """
+    Concrete class for map data types.
+
+    Examples
+    --------
+    Create an instance of MapType:
+
+    >>> import pyarrow as pa
+    >>> pa.map_(pa.string(), pa.int32())
+    MapType(map<string, int32>)
+    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
+    MapType(map<string, int32, keys_sorted>)
+    """
+
+    @property
+    def key_field(self) -> Field[_K]:
+        """
+        The field for keys in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).key_field
+        pyarrow.Field<key: string not null>
+        """
+    @property
+    def key_type(self) -> _K:
+        """
+        The data type of keys in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).key_type
+        DataType(string)
+        """
+    @property
+    def item_field(self) -> Field[_ValueT]:
+        """
+        The field for items in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).item_field
+        pyarrow.Field<value: int32>
+        """
+    @property
+    def item_type(self) -> _ValueT:
+        """
+        The data type of items in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).item_type
+        DataType(int32)
+        """
+    @property
+    def keys_sorted(self) -> _Ordered:
+        """
+        Should the entries be sorted according to keys.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True).keys_sorted
+        True
+        """
+
+_Size = TypeVar("_Size", default=int)
+
+class StructType(DataType):
+    """
+    Concrete class for struct data types.
+
+    ``StructType`` supports direct indexing using ``[...]`` (implemented via
+    ``__getitem__``) to access its fields.
+    It will return the struct field with the given index or name.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    Accessing fields using direct indexing:
+
+    >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+    >>> struct_type[0]
+    pyarrow.Field<x: int32>
+    >>> struct_type["y"]
+    pyarrow.Field<y: string>
+
+    Accessing fields using ``field()``:
+
+    >>> struct_type.field(1)
+    pyarrow.Field<y: string>
+    >>> struct_type.field("x")
+    pyarrow.Field<x: int32>
+
+    # Creating a schema from the struct type's fields:
+    >>> pa.schema(list(struct_type))
+    x: int32
+    y: string
+    """
+    def get_field_index(self, name: str) -> int:
+        """
+        Return index of the unique field with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        index : int
+            The index of the field with the given name; -1 if the
+            name isn't found or there are several fields with the given
+            name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+
+        Index of the field with a name 'y':
+
+        >>> struct_type.get_field_index("y")
+        1
+
+        Index of the field that does not exist:
+
+        >>> struct_type.get_field_index("z")
+        -1
+        """
+    def field(self, i: int | str) -> Field:
+        """
+        Select a field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or str
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+
+        Select the second field:
+
+        >>> struct_type.field(1)
+        pyarrow.Field<y: string>
+
+        Select the field named 'x':
+
+        >>> struct_type.field("x")
+        pyarrow.Field<x: int32>
+        """
+    def get_all_field_indices(self, name: str) -> list[int]:
+        """
+        Return sorted list of indices for the fields with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        indices : List[int]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct_type.get_all_field_indices("x")
+        [0]
+        """
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[Field]: ...
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+    @property
+    def names(self) -> list[str]:
+        """
+        Lists the field names.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type.names
+        ['a', 'b', 'c']
+        """
+    @property
+    def fields(self) -> list[Field]:
+        """
+        Lists all fields within the StructType.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type.fields
+        [pyarrow.Field<a: int64>, pyarrow.Field<b: double>, pyarrow.Field<c: string>]
+        """
+
+class UnionType(DataType):
+    """
+    Base class for union data types.
+
+    Examples
+    --------
+    Create an instance of a dense UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_DENSE,
+    ...     ),
+    ... )
+    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a dense UnionType using ``pa.dense_union``:
+
+    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
+
+    Create an instance of a sparse UnionType using ``pa.union``:
+
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_SPARSE,
+    ...     ),
+    ... )
+    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+
+    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+    @property
+    def mode(self) -> Literal["sparse", "dense"]:
+        """
+        The mode of the union ("dense" or "sparse").
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union.mode
+        'sparse'
+        """
+    @property
+    def type_codes(self) -> list[int]:
+        """
+        The type code to indicate each data type in this union.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union.type_codes
+        [0, 1]
+        """
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[Field]: ...
+    def field(self, i: int) -> Field:
+        """
+        Return a child field by its numeric index.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union[0]
+        pyarrow.Field<a: fixed_size_binary[10]>
+        """
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+
+class SparseUnionType(UnionType):
+    """
+    Concrete class for sparse union types.
+
+    Examples
+    --------
+    Create an instance of a sparse UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_SPARSE,
+    ...     ),
+    ... )
+    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+
+    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+    @property
+    def mode(self) -> Literal["sparse"]: ...
+
+class DenseUnionType(UnionType):
+    """
+    Concrete class for dense union types.
+
+    Examples
+    --------
+    Create an instance of a dense UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_DENSE,
+    ...     ),
+    ... )
+    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a dense UnionType using ``pa.dense_union``:
+
+    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+
+    @property
+    def mode(self) -> Literal["dense"]: ...
+
+_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
+
+class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
+    """
+    Concrete class for run-end encoded types.
+    """
+    @property
+    def run_end_type(self) -> _RunEndType: ...
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+
+class BaseExtensionType(DataType):
+    """
+    Concrete base class for extension types.
+    """
+    def __arrow_ext_class__(self) -> type[ExtensionArray]:
+        """
+        The associated array extension class
+        """
+    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]:
+        """
+        The associated scalar class
+        """
+    @property
+    def extension_name(self) -> str:
+        """
+        The extension type name.
+        """
+    @property
+    def storage_type(self) -> DataType:
+        """
+        The underlying storage type.
+        """
+    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
+class ExtensionType(BaseExtensionType):
+    """
+    Concrete base class for Python-defined extension types.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The underlying storage type for the extension type.
+    extension_name : str
+        A unique name distinguishing this extension type. The name will be
+        used when deserializing IPC data.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Create an instance of RationalType extension type:
+
+    >>> rational_type = RationalType(pa.int32())
+
+    Inspect the extension type:
+
+    >>> rational_type.extension_name
+    'my_package.rational'
+    >>> rational_type.storage_type
+    StructType(struct<numer: int32, denom: int32>)
+
+    Wrap an array as an extension array:
+
+    >>> storage_array = pa.array(
+    ...     [
+    ...         {"numer": 10, "denom": 17},
+    ...         {"numer": 20, "denom": 13},
+    ...     ],
+    ...     type=rational_type.storage_type,
+    ... )
+    >>> rational_array = rational_type.wrap_array(storage_array)
+    >>> rational_array
+    <pyarrow.lib.ExtensionArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
+
+    Or do the same with creating an ExtensionArray:
+
+    >>> rational_array = pa.ExtensionArray.from_storage(rational_type, storage_array)
+    >>> rational_array
+    <pyarrow.lib.ExtensionArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+
+    Note that even though we registered the concrete type
+    ``RationalType(pa.int64())``, PyArrow will be able to deserialize
+    ``RationalType(integer_type)`` for any ``integer_type``, as the deserializer
+    will reference the name ``my_package.rational`` and the ``@classmethod``
+    ``__arrow_ext_deserialize__``.
+    """
+
+    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+    def __arrow_ext_serialize__(self) -> bytes:
+        """
+        Serialized representation of metadata to reconstruct the type object.
+
+        This method should return a bytes object, and those serialized bytes
+        are stored in the custom metadata of the Field holding an extension
+        type in an IPC message.
+        The bytes are passed to ``__arrow_ext_deserialize`` and should hold
+        sufficient information to reconstruct the data type instance.
+        """
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type: DataType, serialized: bytes) -> Self:
+        """
+        Return an extension type instance from the storage type and serialized
+        metadata.
+
+        This method should return an instance of the ExtensionType subclass
+        that matches the passed storage type and serialized metadata (the
+        return value of ``__arrow_ext_serialize__``).
+        """
+
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
+    """
+    Concrete class for fixed shape tensor extension type.
+
+    Examples
+    --------
+    Create an instance of fixed shape tensor extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
+
+    Create an instance of fixed shape tensor extension type with
+    permutation:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type.permutation
+    [0, 2, 1]
+    """
+    @property
+    def value_type(self) -> _ValueT:
+        """
+        Data type of an individual tensor.
+        """
+    @property
+    def shape(self) -> list[int]:
+        """
+        Shape of the tensors.
+        """
+    @property
+    def dim_names(self) -> list[str] | None:
+        """
+        Explicit names of the dimensions.
+        """
+    @property
+    def permutation(self) -> list[int] | None:
+        """
+        Indices of the dimensions ordering.
+        """
+
+class Bool8Type(BaseExtensionType):
+    """
+    Concrete class for bool8 extension type.
+
+    Bool8 is an alternate representation for boolean
+    arrays using 8 bits instead of 1 bit per value. The underlying
+    storage type is int8.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.bool8()
+    Bool8Type(extension<arrow.bool8>)
+    """
+
+class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+class JsonType(BaseExtensionType):
+    """
+    Concrete class for JSON extension type.
+
+    Examples
+    --------
+    Define the extension type for JSON array
+
+    >>> import pyarrow as pa
+    >>> json_type = pa.json_(pa.large_utf8())
+
+    Create an extension array
+
+    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
+    >>> storage = pa.array(arr, pa.large_utf8())
+    >>> pa.ExtensionArray.from_storage(json_type, storage)
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      null,
+      "{ "id":30, "values":["a", "b"] }"
+    ]
+    """
+
+class OpaqueType(BaseExtensionType):
+    """
+    Concrete class for opaque extension type.
+
+    Opaque is a placeholder for a type from an external (often non-Arrow)
+    system that could not be interpreted.
+
+    Examples
+    --------
+    Create an instance of opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.opaque(pa.int32(), "geometry", "postgis")
+    OpaqueType(extension<arrow.opaque[storage_type=int32, type_name=geometry, vendor_name=postgis]>)
+    """
+    @property
+    def type_name(self) -> str:
+        """
+        The name of the type in the external system.
+        """
+    @property
+    def vendor_name(self) -> str:
+        """
+        The name of the external system.
+        """
+
+@deprecated(
+    "This class is deprecated and its deserialization is disabled by default. "
+    ":class:`ExtensionType` is recommended instead."
+)
+class PyExtensionType(ExtensionType):
+    """
+    Concrete base class for Python-defined extension types based on pickle
+    for (de)serialization.
+
+    .. warning::
+       This class is deprecated and its deserialization is disabled by default.
+       :class:`ExtensionType` is recommended instead.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The storage type for which the extension is built.
+    """
+    def __init__(self, storage_type: DataType) -> None: ...
+    @classmethod
+    def set_auto_load(cls, value: bool) -> None:
+        """
+        Enable or disable auto-loading of serialized PyExtensionType instances.
+
+        Parameters
+        ----------
+        value : bool
+            Whether to enable auto-loading.
+        """
+
+class UnknownExtensionType(PyExtensionType):  # type: ignore
+    """
+    A concrete class for Python-defined extension types that refer to
+    an unknown Python implementation.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The storage type for which the extension is built.
+    serialized : bytes
+        The serialised output.
+    """
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+def register_extension_type(ext_type: PyExtensionType) -> None:  # type: ignore
+    """
+    Register a Python extension type.
+
+    Registration is based on the extension name (so different registered types
+    need unique extension names). Registration needs an extension type
+    instance, but then works for any instance of the same subclass regardless
+    of parametrization of the type.
+
+    Parameters
+    ----------
+    ext_type : BaseExtensionType instance
+        The ExtensionType subclass to register.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+    """
+
+def unregister_extension_type(type_name: str) -> None:
+    """
+    Unregister a Python extension type.
+
+    Parameters
+    ----------
+    type_name : str
+        The name of the ExtensionType subclass to unregister.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+    """
+
+class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
+    """
+    KeyValueMetadata
+
+    Parameters
+    ----------
+    __arg0__ : dict
+        A dict of the key-value metadata
+    **kwargs : optional
+        additional key-value metadata
+    """
+    def __init__(self, __arg0__: Mapping[bytes, bytes] | None = None, **kwargs) -> None: ...
+    def equals(self, other: KeyValueMetadata) -> bool: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, __key: object) -> bool: ...
+    def __getitem__(self, __key: Any) -> Any: ...
+    def __iter__(self) -> Iterator[bytes]: ...
+    def get_all(self, key: str) -> list[bytes]: ...
+    def to_dict(self) -> dict[bytes, bytes]:
+        """
+        Convert KeyValueMetadata to dict. If a key occurs twice, the value for
+        the first one is returned
+        """
+
+def ensure_metadata(
+    meta: Mapping[bytes | str, bytes | str] | KeyValueMetadata | None, allow_none: bool = False
+) -> KeyValueMetadata | None: ...
+
+class Field(_Weakrefable, Generic[_DataTypeT]):
+    """
+    A named field, with a data type, nullability, and optional metadata.
+
+    Notes
+    -----
+    Do not use this class's constructor directly; use pyarrow.field
+
+    Examples
+    --------
+    Create an instance of pyarrow.Field:
+
+    >>> import pyarrow as pa
+    >>> pa.field("key", pa.int32())
+    pyarrow.Field<key: int32>
+    >>> pa.field("key", pa.int32(), nullable=False)
+    pyarrow.Field<key: int32 not null>
+    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field
+    pyarrow.Field<key: int32>
+    >>> field.metadata
+    {b'key': b'Something important'}
+
+    Use the field to create a struct type:
+
+    >>> pa.struct([field])
+    StructType(struct<key: int32>)
+    """
+
+    def equals(self, other: Field, check_metadata: bool = False) -> bool:
+        """
+        Test if this field is equal to the other
+
+        Parameters
+        ----------
+        other : pyarrow.Field
+        check_metadata : bool, default False
+            Whether Field metadata equality should be checked as well.
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("key", pa.int32())
+        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1.equals(f2)
+        False
+        >>> f1.equals(f1)
+        True
+        """
+    def __hash__(self) -> int: ...
+    @property
+    def nullable(self) -> bool:
+        """
+        The field nullability.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("key", pa.int32())
+        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1.nullable
+        True
+        >>> f2.nullable
+        False
+        """
+    @property
+    def name(self) -> str:
+        """
+        The field name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field.name
+        'key'
+        """
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None:
+        """
+        The field metadata (if any is set).
+
+        Returns
+        -------
+        metadata : dict or None
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field.metadata
+        {b'key': b'Something important'}
+        """
+    @property
+    def type(self) -> _DataTypeT: ...
+    def with_metadata(self, metadata: dict[bytes | str, bytes | str]) -> Self:
+        """
+        Add metadata as dict of string keys and values to Field
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+
+        Create new field by adding metadata to existing one:
+
+        >>> field_new = field.with_metadata({"key": "Something important"})
+        >>> field_new
+        pyarrow.Field<key: int32>
+        >>> field_new.metadata
+        {b'key': b'Something important'}
+        """
+    def remove_metadata(self) -> Self:
+        """
+        Create new field without metadata, if any
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field.metadata
+        {b'key': b'Something important'}
+
+        Create new field by removing the metadata from the existing one:
+
+        >>> field_new = field.remove_metadata()
+        >>> field_new.metadata
+        """
+    def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]:
+        """
+        A copy of this field with the replaced type
+
+        Parameters
+        ----------
+        new_type : pyarrow.DataType
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+
+        Create new field by replacing type of an existing one:
+
+        >>> field_new = field.with_type(pa.int64())
+        >>> field_new
+        pyarrow.Field<key: int64>
+        """
+    def with_name(self, name: str) -> Self:
+        """
+        A copy of this field with the replaced name
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+
+        Create new field by replacing the name of an existing one:
+
+        >>> field_new = field.with_name("lock")
+        >>> field_new
+        pyarrow.Field<lock: int32>
+        """
+    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]:
+        """
+        A copy of this field with the replaced nullability
+
+        Parameters
+        ----------
+        nullable : bool
+
+        Returns
+        -------
+        field: pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+        >>> field.nullable
+        True
+
+        Create new field by replacing the nullability of an existing one:
+
+        >>> field_new = field.with_nullable(False)
+        >>> field_new
+        pyarrow.Field<key: int32 not null>
+        >>> field_new.nullable
+        False
+        """
+    def flatten(self) -> list[Field]:
+        """
+        Flatten this field.  If a struct field, individual child fields
+        will be returned with their names prefixed by the parent's name.
+
+        Returns
+        -------
+        fields : List[pyarrow.Field]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("bar", pa.float64(), nullable=False)
+        >>> f2 = pa.field("foo", pa.int32()).with_metadata({"key": "Something important"})
+        >>> ff = pa.field("ff", pa.struct([f1, f2]), nullable=False)
+
+        Flatten a struct field:
+
+        >>> ff
+        pyarrow.Field<ff: struct<bar: double not null, foo: int32> not null>
+        >>> ff.flatten()
+        [pyarrow.Field<ff.bar: double not null>, pyarrow.Field<ff.foo: int32>]
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import Field from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self:
+        """
+        Import a Field from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+class Schema(_Weakrefable):
+    """
+    A named collection of types a.k.a schema. A schema defines the
+    column names and types in a record batch or table data structure.
+    They also contain metadata about the columns. For example, schemas
+    converted from Pandas contain metadata about their original Pandas
+    types so they can be converted back to the same types.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly. Instead use
+    :func:`pyarrow.schema` factory function which makes a new Arrow
+    Schema object.
+
+    Examples
+    --------
+    Create a new Arrow Schema object:
+
+    >>> import pyarrow as pa
+    >>> pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+    some_int: int32
+    some_string: string
+
+    Create Arrow Schema with metadata:
+
+    >>> pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    """
+
+    def __len__(self) -> int: ...
+    def __getitem__(self, key: str) -> Field: ...
+    _field = __getitem__  # pyright: ignore[reportUnknownVariableType]
+    def __iter__(self) -> Iterator[Field]: ...
+    def __hash__(self) -> int: ...
+    def __sizeof__(self) -> int: ...
+    @property
+    def pandas_metadata(self) -> dict:
+        """
+        Return deserialized-from-JSON pandas metadata field (if it exists)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> schema = pa.Table.from_pandas(df).schema
+
+        Select pandas metadata field from Arrow Schema:
+
+        >>> schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, 'stop': 4, 'step': 1}], ...
+        """
+    @property
+    def names(self) -> list[str]:
+        """
+        The schema's field names.
+
+        Returns
+        -------
+        list of str
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the names of the schema's fields:
+
+        >>> schema.names
+        ['n_legs', 'animals']
+        """
+    @property
+    def types(self) -> list[DataType]:
+        """
+        The schema's field types.
+
+        Returns
+        -------
+        list of DataType
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the types of the schema's fields:
+
+        >>> schema.types
+        [DataType(int64), DataType(string)]
+        """
+    @property
+    def metadata(self) -> dict[bytes, bytes]:
+        """
+        The schema's metadata (if any is set).
+
+        Returns
+        -------
+        metadata: dict or None
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+
+        Get the metadata of the schema's fields:
+
+        >>> schema.metadata
+        {b'n_legs': b'Number of legs per animal'}
+        """
+    def empty_table(self) -> Table:
+        """
+        Provide an empty table according to the schema.
+
+        Returns
+        -------
+        table: pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Create an empty table with schema's fields:
+
+        >>> schema.empty_table()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[]]
+        animals: [[]]
+        """
+    def equals(self, other: Schema, check_metadata: bool = False) -> bool:
+        """
+        Test if this schema is equal to the other
+
+        Parameters
+        ----------
+        other :  pyarrow.Schema
+        check_metadata : bool, default False
+            Key/value metadata must be equal too
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema1 = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema2 = pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+
+        Test two equal schemas:
+
+        >>> schema1.equals(schema1)
+        True
+
+        Test two unequal schemas:
+
+        >>> schema1.equals(schema2)
+        False
+        """
+    @classmethod
+    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | None = None) -> Schema:
+        """
+        Returns implied schema from dataframe
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        preserve_index : bool, default True
+            Whether to store the index as an additional column (or columns, for
+            MultiIndex) in the resulting `Table`.
+            The default of None will store the index as a column, except for
+            RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+
+        Returns
+        -------
+        pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame({"int": [1, 2], "str": ["a", "b"]})
+
+        Create an Arrow Schema from the schema of a pandas dataframe:
+
+        >>> pa.Schema.from_pandas(df)
+        int: int64
+        str: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, ...
+        """
+    def field(self, i: int | str | bytes) -> Field:
+        """
+        Select a field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Select the second field:
+
+        >>> schema.field(1)
+        pyarrow.Field<animals: string>
+
+        Select the field of the column named 'n_legs':
+
+        >>> schema.field("n_legs")
+        pyarrow.Field<n_legs: int64>
+        """
+    @deprecated("Use 'field' instead")
+    def field_by_name(self, name: str) -> Field:
+        """
+        DEPRECATED
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        field: pyarrow.Field
+        """
+    def get_field_index(self, name: str) -> int:
+        """
+        Return index of the unique field with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        index : int
+            The index of the field with the given name; -1 if the
+            name isn't found or there are several fields with the given
+            name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the index of the field named 'animals':
+
+        >>> schema.get_field_index("animals")
+        1
+
+        Index in case of several fields with the given name:
+
+        >>> schema = pa.schema(
+        ...     [
+        ...         pa.field("n_legs", pa.int64()),
+        ...         pa.field("animals", pa.string()),
+        ...         pa.field("animals", pa.bool_()),
+        ...     ],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema.get_field_index("animals")
+        -1
+        """
+    def get_all_field_indices(self, name: str) -> list[int]:
+        """
+        Return sorted list of indices for the fields with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        indices : List[int]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [
+        ...         pa.field("n_legs", pa.int64()),
+        ...         pa.field("animals", pa.string()),
+        ...         pa.field("animals", pa.bool_()),
+        ...     ]
+        ... )
+
+        Get the indexes of the fields named 'animals':
+
+        >>> schema.get_all_field_indices("animals")
+        [1, 2]
+        """
+    def append(self, field: Field) -> Schema:
+        """
+        Append a field at the end of the schema.
+
+        In contrast to Python's ``list.append()`` it does return a new
+        object, leaving the original Schema unmodified.
+
+        Parameters
+        ----------
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+            New object with appended field.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Append a field 'extra' at the end of the schema:
+
+        >>> schema_new = schema.append(pa.field("extra", pa.bool_()))
+        >>> schema_new
+        n_legs: int64
+        animals: string
+        extra: bool
+
+        Original schema is unmodified:
+
+        >>> schema
+        n_legs: int64
+        animals: string
+        """
+    def insert(self, i: int, field: Field) -> Schema:
+        """
+        Add a field at position i to the schema.
+
+        Parameters
+        ----------
+        i : int
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Insert a new field on the second position:
+
+        >>> schema.insert(1, pa.field("extra", pa.bool_()))
+        n_legs: int64
+        extra: bool
+        animals: string
+        """
+    def remove(self, i: int) -> Schema:
+        """
+        Remove the field at index i from the schema.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Remove the second field of the schema:
+
+        >>> schema.remove(1)
+        n_legs: int64
+        """
+    def set(self, i: int, field: Field) -> Schema:
+        """
+        Replace a field at position i in the schema.
+
+        Parameters
+        ----------
+        i : int
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Replace the second field of the schema with a new field 'extra':
+
+        >>> schema.set(1, pa.field("replaced", pa.bool_()))
+        n_legs: int64
+        replaced: bool
+        """
+    @deprecated("Use 'with_metadata' instead")
+    def add_metadata(self, metadata: dict) -> Schema:
+        """
+        DEPRECATED
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+        """
+    def with_metadata(self, metadata: dict) -> Schema:
+        """
+        Add metadata as dict of string keys and values to Schema
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Add metadata to existing schema field:
+
+        >>> schema.with_metadata({"n_legs": "Number of legs per animal"})
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write Schema to Buffer as encapsulated IPC message
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Write schema to Buffer:
+
+        >>> schema.serialize()
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
+        """
+    def remove_metadata(self) -> Schema:
+        """
+        Create new schema without metadata, if any
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Create a new schema with removing the metadata from the original:
+
+        >>> schema.remove_metadata()
+        n_legs: int64
+        animals: string
+        """
+    def to_string(
+        self,
+        truncate_metadata: bool = True,
+        show_field_metadata: bool = True,
+        show_schema_metadata: bool = True,
+    ) -> str:
+        """
+        Return human-readable representation of Schema
+
+        Parameters
+        ----------
+        truncate_metadata : boolean, default True
+            Limit metadata key/value display to a single line of ~80 characters
+            or less
+        show_field_metadata : boolean, default True
+            Display Field-level KeyValueMetadata
+        show_schema_metadata : boolean, default True
+            Display Schema-level KeyValueMetadata
+
+        Returns
+        -------
+        str : the formatted output
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Schema:
+        """
+        Import Schema from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @staticmethod
+    def _import_from_c_capsule(schema: Any) -> Schema:
+        """
+        Import a Schema from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+def unify_schemas(
+    schemas: list[Schema], *, promote_options: Literal["default", "permissive"] = "default"
+) -> Schema:
+    """
+    Unify schemas by merging fields by name.
+
+    The resulting schema will contain the union of fields from all schemas.
+    Fields with the same name will be merged. Note that two fields with
+    different types will fail merging by default.
+
+    - The unified field will inherit the metadata from the schema where
+        that field is first defined.
+    - The first N fields in the schema will be ordered the same as the
+        N fields in the first schema.
+
+    The resulting schema will inherit its metadata from the first input
+    schema.
+
+    Parameters
+    ----------
+    schemas : list of Schema
+        Schemas to merge into a single one.
+    promote_options : str, default default
+        Accepts strings "default" and "permissive".
+        Default: null and only null can be unified with another type.
+        Permissive: types are promoted to the greater common denominator.
+
+    Returns
+    -------
+    Schema
+
+    Raises
+    ------
+    ArrowInvalid :
+        If any input schema contains fields with duplicate names.
+        If Fields of the same name are not mergeable.
+    """
+
+@overload
+def field(name: SupportArrowSchema) -> Field[Any]: ...
+@overload
+def field(
+    name: str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT]: ...
+def field(*args, **kwargs):
+    """
+    Create a pyarrow.Field instance.
+
+    Parameters
+    ----------
+    name : str or bytes
+        Name of the field.
+        Alternatively, you can also pass an object that implements the Arrow
+        PyCapsule Protocol for schemas (has an ``__arrow_c_schema__`` method).
+    type : pyarrow.DataType or str
+        Arrow datatype of the field or a string matching one.
+    nullable : bool, default True
+        Whether the field's values are nullable.
+    metadata : dict, default None
+        Optional field metadata, the keys and values must be coercible to
+        bytes.
+
+    Returns
+    -------
+    field : pyarrow.Field
+
+    Examples
+    --------
+    Create an instance of pyarrow.Field:
+
+    >>> import pyarrow as pa
+    >>> pa.field("key", pa.int32())
+    pyarrow.Field<key: int32>
+    >>> pa.field("key", pa.int32(), nullable=False)
+    pyarrow.Field<key: int32 not null>
+
+    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field
+    pyarrow.Field<key: int32>
+    >>> field.metadata
+    {b'key': b'Something important'}
+
+    Use the field to create a struct type:
+
+    >>> pa.struct([field])
+    StructType(struct<key: int32>)
+
+    A str can also be passed for the type parameter:
+
+    >>> pa.field("key", "int32")
+    pyarrow.Field<key: int32>
+    """
+
+def null() -> NullType:
+    """
+    Create instance of null type.
+
+    Examples
+    --------
+    Create an instance of a null type:
+
+    >>> import pyarrow as pa
+    >>> pa.null()
+    DataType(null)
+    >>> print(pa.null())
+    null
+
+    Create a ``Field`` type with a null type and a name:
+
+    >>> pa.field("null_field", pa.null())
+    pyarrow.Field<null_field: null>
+    """
+
+def bool_() -> BoolType:
+    """
+    Create instance of boolean type.
+
+    Examples
+    --------
+    Create an instance of a boolean type:
+
+    >>> import pyarrow as pa
+    >>> pa.bool_()
+    DataType(bool)
+    >>> print(pa.bool_())
+    bool
+
+    Create a ``Field`` type with a boolean type
+    and a name:
+
+    >>> pa.field("bool_field", pa.bool_())
+    pyarrow.Field<bool_field: bool>
+    """
+
+def uint8() -> UInt8Type:
+    """
+    Create instance of unsigned int8 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int8 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint8()
+    DataType(uint8)
+    >>> print(pa.uint8())
+    uint8
+
+    Create an array with unsigned int8 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint8())
+    <pyarrow.lib.UInt8Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int8() -> Int8Type:
+    """
+    Create instance of signed int8 type.
+
+    Examples
+    --------
+    Create an instance of int8 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int8()
+    DataType(int8)
+    >>> print(pa.int8())
+    int8
+
+    Create an array with int8 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int8())
+    <pyarrow.lib.Int8Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint16() -> UInt16Type:
+    """
+    Create instance of unsigned uint16 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint16()
+    DataType(uint16)
+    >>> print(pa.uint16())
+    uint16
+
+    Create an array with unsigned int16 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint16())
+    <pyarrow.lib.UInt16Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int16() -> Int16Type:
+    """
+    Create instance of signed int16 type.
+
+    Examples
+    --------
+    Create an instance of int16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int16()
+    DataType(int16)
+    >>> print(pa.int16())
+    int16
+
+    Create an array with int16 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int16())
+    <pyarrow.lib.Int16Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint32() -> Uint32Type:
+    """
+    Create instance of unsigned uint32 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint32()
+    DataType(uint32)
+    >>> print(pa.uint32())
+    uint32
+
+    Create an array with unsigned int32 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint32())
+    <pyarrow.lib.UInt32Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int32() -> Int32Type:
+    """
+    Create instance of signed int32 type.
+
+    Examples
+    --------
+    Create an instance of int32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int32()
+    DataType(int32)
+    >>> print(pa.int32())
+    int32
+
+    Create an array with int32 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int32())
+    <pyarrow.lib.Int32Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int64() -> Int64Type:
+    """
+    Create instance of signed int64 type.
+
+    Examples
+    --------
+    Create an instance of int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int64()
+    DataType(int64)
+    >>> print(pa.int64())
+    int64
+
+    Create an array with int64 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int64())
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint64() -> UInt64Type:
+    """
+    Create instance of unsigned uint64 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint64()
+    DataType(uint64)
+    >>> print(pa.uint64())
+    uint64
+
+    Create an array with unsigned uint64 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint64())
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def tzinfo_to_string(tz: dt.tzinfo) -> str:
+    """
+    Converts a time zone object into a string indicating the name of a time
+    zone, one of:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      tz : datetime.tzinfo
+        Time zone object
+
+    Returns
+    -------
+      name : str
+        Time zone name
+    """
+
+def string_to_tzinfo(name: str) -> dt.tzinfo:
+    """
+    Convert a time zone name into a time zone object.
+
+    Supported input strings are:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      name: str
+        Time zone name.
+
+    Returns
+    -------
+      tz : datetime.tzinfo
+        Time zone object
+    """
+
+@overload
+def timestamp(unit: _Unit) -> TimestampType[_Unit, _Tz]: ...
+@overload
+def timestamp(unit: _Unit, tz: _Tz) -> TimestampType[_Unit, _Tz]: ...
+def timestamp(*args, **kwargs):
+    """
+    Create instance of timestamp type with resolution and optional time zone.
+
+    Parameters
+    ----------
+    unit : str
+        one of 's' [second], 'ms' [millisecond], 'us' [microsecond], or 'ns'
+        [nanosecond]
+    tz : str, default None
+        Time zone name. None indicates time zone naive
+
+    Examples
+    --------
+    Create an instance of timestamp type:
+
+    >>> import pyarrow as pa
+    >>> pa.timestamp("us")
+    TimestampType(timestamp[us])
+    >>> pa.timestamp("s", tz="America/New_York")
+    TimestampType(timestamp[s, tz=America/New_York])
+    >>> pa.timestamp("s", tz="+07:30")
+    TimestampType(timestamp[s, tz=+07:30])
+
+    Use timestamp type when creating a scalar object:
+
+    >>> from datetime import datetime
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("s", tz="UTC"))
+    <pyarrow.TimestampScalar: '2012-01-01T00:00:00+0000'>
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("us"))
+    <pyarrow.TimestampScalar: '2012-01-01T00:00:00.000000'>
+
+    Returns
+    -------
+    timestamp_type : TimestampType
+    """
+
+def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]:
+    """
+    Create instance of 32-bit time (time of day) type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        one of 's' [second], or 'ms' [millisecond]
+
+    Returns
+    -------
+    type : pyarrow.Time32Type
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.time32("s")
+    Time32Type(time32[s])
+    >>> pa.time32("ms")
+    Time32Type(time32[ms])
+    """
+
+def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]:
+    """
+    Create instance of 64-bit time (time of day) type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        One of 'us' [microsecond], or 'ns' [nanosecond].
+
+    Returns
+    -------
+    type : pyarrow.Time64Type
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.time64("us")
+    Time64Type(time64[us])
+    >>> pa.time64("ns")
+    Time64Type(time64[ns])
+    """
+
+def duration(unit: _Unit) -> DurationType[_Unit]:
+    """
+    Create instance of a duration type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        One of 's' [second], 'ms' [millisecond], 'us' [microsecond], or
+        'ns' [nanosecond].
+
+    Returns
+    -------
+    type : pyarrow.DurationType
+
+    Examples
+    --------
+    Create an instance of duration type:
+
+    >>> import pyarrow as pa
+    >>> pa.duration("us")
+    DurationType(duration[us])
+    >>> pa.duration("s")
+    DurationType(duration[s])
+
+    Create an array with duration type:
+
+    >>> pa.array([0, 1, 2], type=pa.duration("s"))
+    <pyarrow.lib.DurationArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def month_day_nano_interval() -> MonthDayNanoIntervalType:
+    """
+    Create instance of an interval type representing months, days and
+    nanoseconds between two dates.
+
+    Examples
+    --------
+    Create an instance of an month_day_nano_interval type:
+
+    >>> import pyarrow as pa
+    >>> pa.month_day_nano_interval()
+    DataType(month_day_nano_interval)
+
+    Create a scalar with month_day_nano_interval type:
+
+    >>> pa.scalar((1, 15, -30), type=pa.month_day_nano_interval())
+    <pyarrow.MonthDayNanoIntervalScalar: MonthDayNano(months=1, days=15, nanoseconds=-30)>
+    """
+
+def date32() -> Date32Type:
+    """
+    Create instance of 32-bit date (days since UNIX epoch 1970-01-01).
+
+    Examples
+    --------
+    Create an instance of 32-bit date type:
+
+    >>> import pyarrow as pa
+    >>> pa.date32()
+    DataType(date32[day])
+
+    Create a scalar with 32-bit date type:
+
+    >>> from datetime import date
+    >>> pa.scalar(date(2012, 1, 1), type=pa.date32())
+    <pyarrow.Date32Scalar: datetime.date(2012, 1, 1)>
+    """
+
+def date64() -> Date64Type:
+    """
+    Create instance of 64-bit date (milliseconds since UNIX epoch 1970-01-01).
+
+    Examples
+    --------
+    Create an instance of 64-bit date type:
+
+    >>> import pyarrow as pa
+    >>> pa.date64()
+    DataType(date64[ms])
+
+    Create a scalar with 64-bit date type:
+
+    >>> from datetime import datetime
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.date64())
+    <pyarrow.Date64Scalar: datetime.date(2012, 1, 1)>
+    """
+
+def float16() -> Float16Type:
+    """
+    Create half-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float16()
+    DataType(halffloat)
+    >>> print(pa.float16())
+    halffloat
+
+    Create an array with float16 type:
+
+    >>> arr = np.array([1.5, np.nan], dtype=np.float16)
+    >>> a = pa.array(arr, type=pa.float16())
+    >>> a
+    <pyarrow.lib.HalfFloatArray object at ...>
+    [
+      15872,
+      32256
+    ]
+
+    Note that unlike other float types, if you convert this array
+    to a python list, the types of its elements will be ``np.float16``
+
+    >>> [type(val) for val in a.to_pylist()]
+    [<class 'numpy.float16'>, <class 'numpy.float16'>]
+    """
+
+def float32() -> Float32Type:
+    """
+    Create single-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float32()
+    DataType(float)
+    >>> print(pa.float32())
+    float
+
+    Create an array with float32 type:
+
+    >>> pa.array([0.0, 1.0, 2.0], type=pa.float32())
+    <pyarrow.lib.FloatArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def float64() -> Float64Type:
+    """
+    Create double-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float64()
+    DataType(double)
+    >>> print(pa.float64())
+    double
+
+    Create an array with float64 type:
+
+    >>> pa.array([0.0, 1.0, 2.0], type=pa.float64())
+    <pyarrow.lib.DoubleArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+@overload
+def decimal32(precision: _Precision) -> Decimal32Type[_Precision, Literal[0]]: ...
+@overload
+def decimal32(precision: _Precision, scale: _Scale) -> Decimal32Type[_Precision, _Scale]: ...
+def decimal32(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 32-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal32(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 32-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal32(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 32-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 9 significant digits, consider
+    using ``decimal64``, ``decimal128``, or ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 9
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal32Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal32(5, 2)
+    Decimal32Type(decimal32(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal32(5, 2))
+    <pyarrow.lib.Decimal32Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal64(precision: _Precision) -> Decimal64Type[_Precision, Literal[0]]: ...
+@overload
+def decimal64(precision: _Precision, scale: _Scale) -> Decimal64Type[_Precision, _Scale]: ...
+def decimal64(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 64-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal64(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 64-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal64(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 64-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 18 significant digits, consider
+    using ``decimal128``, or ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 18
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal64Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal64(5, 2)
+    Decimal64Type(decimal64(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal64(5, 2))
+    <pyarrow.lib.Decimal64Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal128(precision: _Precision) -> Decimal128Type[_Precision, Literal[0]]: ...
+@overload
+def decimal128(precision: _Precision, scale: _Scale) -> Decimal128Type[_Precision, _Scale]: ...
+def decimal128(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 128-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal128(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 128-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal128(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 128-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 38 significant digits, consider
+    using ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 38
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal128Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal128(5, 2)
+    Decimal128Type(decimal128(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal128(5, 2))
+    <pyarrow.lib.Decimal128Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal256(precision: _Precision) -> Decimal256Type[_Precision, Literal[0]]: ...
+@overload
+def decimal256(precision: _Precision, scale: _Scale) -> Decimal256Type[_Precision, _Scale]: ...
+def decimal256(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 256-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    For most use cases, the maximum precision offered by ``decimal128``
+    is sufficient, and it will result in a more compact and more efficient
+    encoding.  ``decimal256`` is useful if you need a precision higher
+    than 38 significant digits.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 76
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal256Type
+    """
+
+def string() -> StringType:
+    """
+    Create UTF8 variable-length string type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string()
+    DataType(string)
+
+    and use the string type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.string())
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "foo",
+      "bar",
+      "baz"
+    ]
+    """
+
+utf8 = string
+"""
+Alias for string().
+
+Examples
+--------
+Create an instance of a string type:
+
+>>> import pyarrow as pa
+>>> pa.utf8()
+DataType(string)
+
+and use the string type to create an array:
+
+>>> pa.array(['foo', 'bar', 'baz'], type=pa.utf8())
+<pyarrow.lib.StringArray object at ...>
+[
+    "foo",
+    "bar",
+    "baz"
+]
+"""
+
+@overload
+def binary(length: Literal[-1] = ...) -> BinaryType: ...
+@overload
+def binary(length: int) -> FixedSizeBinaryType: ...
+def binary(length):
+    """
+    Create variable-length or fixed size binary type.
+
+    Parameters
+    ----------
+    length : int, optional, default -1
+        If length == -1 then return a variable length binary type. If length is
+        greater than or equal to 0 then return a fixed size binary type of
+        width `length`.
+
+    Examples
+    --------
+    Create an instance of a variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary()
+    DataType(binary)
+
+    and use the variable-length binary type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.binary())
+    <pyarrow.lib.BinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+
+    Create an instance of a fixed-size binary type:
+
+    >>> pa.binary(3)
+    FixedSizeBinaryType(fixed_size_binary[3])
+
+    and use the fixed-length binary type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.binary(3))
+    <pyarrow.lib.FixedSizeBinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+    """
+
+def large_binary() -> LargeBinaryType:
+    """
+    Create large variable-length binary type.
+
+    This data type may not be supported by all Arrow implementations.  Unless
+    you need to represent data larger than 2GB, you should prefer binary().
+
+    Examples
+    --------
+    Create an instance of large variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.large_binary()
+    DataType(large_binary)
+
+    and use the type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.large_binary())
+    <pyarrow.lib.LargeBinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+    """
+
+def large_string() -> LargeStringType:
+    """
+    Create large UTF8 variable-length string type.
+
+    This data type may not be supported by all Arrow implementations.  Unless
+    you need to represent data larger than 2GB, you should prefer string().
+
+    Examples
+    --------
+    Create an instance of large UTF8 variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.large_string()
+    DataType(large_string)
+
+    and use the type to create an array:
+
+    >>> pa.array(["foo", "bar"] * 50, type=pa.large_string())
+    <pyarrow.lib.LargeStringArray object at ...>
+    [
+      "foo",
+      "bar",
+      ...
+      "foo",
+      "bar"
+    ]
+    """
+
+large_utf8 = large_string
+"""
+Alias for large_string().
+
+Examples
+--------
+Create an instance of large UTF8 variable-length binary type:
+
+>>> import pyarrow as pa
+>>> pa.large_utf8()
+DataType(large_string)
+
+and use the type to create an array:
+
+>>> pa.array(['foo', 'bar'] * 50, type=pa.large_utf8())
+<pyarrow.lib.LargeStringArray object at ...>
+[
+    "foo",
+    "bar",
+    ...
+    "foo",
+    "bar"
+]
+"""
+
+def binary_view() -> BinaryViewType:
+    """
+    Create a variable-length binary view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary_view()
+    DataType(binary_view)
+    """
+
+def string_view() -> StringViewType:
+    """
+    Create UTF8 variable-length string view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string_view()
+    DataType(string_view)
+    """
+
+@overload
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] = ...
+) -> ListType[_DataTypeT]: ...
+@overload
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size
+) -> FixedSizeListType[_DataTypeT, _Size]: ...
+def list_(*args, **kwargs):
+    """
+    Create ListType instance from child data type or field.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+    list_size : int, optional, default -1
+        If length == -1 then return a variable length list type. If length is
+        greater than or equal to 0 then return a fixed size list type.
+
+    Returns
+    -------
+    list_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.string())
+    ListType(list<item: string>)
+    >>> pa.list_(pa.int32(), 2)
+    FixedSizeListType(fixed_size_list<item: int32>[2])
+
+    Use the ListType to create a scalar:
+
+    >>> pa.scalar(["foo", None], type=pa.list_(pa.string(), 2))
+    <pyarrow.FixedSizeListScalar: ['foo', None]>
+
+    or an array:
+
+    >>> pa.array([[1, 2], [3, 4]], pa.list_(pa.int32(), 2))
+    <pyarrow.lib.FixedSizeListArray object at ...>
+    [
+      [
+        1,
+        2
+      ],
+      [
+        3,
+        4
+      ]
+    ]
+    """
+
+def large_list(value_type: _DataTypeT | Field[_DataTypeT]) -> LargeListType[_DataTypeT]:
+    """
+    Create LargeListType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations.
+    Unless you need to represent data larger than 2**31 elements, you should
+    prefer list_().
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list(pa.int8())
+    LargeListType(large_list<item: int8>)
+
+    Use the LargeListType to create an array:
+
+    >>> pa.array([[-1, 3]] * 5, type=pa.large_list(pa.int8()))
+    <pyarrow.lib.LargeListArray object at ...>
+    [
+      [
+        -1,
+        3
+      ],
+      [
+        -1,
+        3
+      ],
+    ...
+    """
+
+def list_view(value_type: _DataTypeT | Field[_DataTypeT]) -> ListViewType[_DataTypeT]:
+    """
+    Create ListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+
+def large_list_view(
+    value_type: _DataTypeT | Field[_DataTypeT],
+) -> LargeListViewType[_DataTypeT]:
+    """
+    Create LargeListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.int8())
+    LargeListViewType(large_list_view<item: int8>)
+    """
+
+@overload
+def map_(key_type: _K, item_type: _ValueT) -> MapType[_K, _ValueT, _Ordered]: ...
+@overload
+def map_(
+    key_type: _K, item_type: _ValueT, key_sorted: _Ordered
+) -> MapType[_K, _ValueT, _Ordered]: ...
+def map_(*args, **kwargs):
+    """
+    Create MapType instance from key and item data types or fields.
+
+    Parameters
+    ----------
+    key_type : DataType or Field
+    item_type : DataType or Field
+    keys_sorted : bool
+
+    Returns
+    -------
+    map_type : DataType
+
+    Examples
+    --------
+    Create an instance of MapType:
+
+    >>> import pyarrow as pa
+    >>> pa.map_(pa.string(), pa.int32())
+    MapType(map<string, int32>)
+    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
+    MapType(map<string, int32, keys_sorted>)
+
+    Use MapType to create an array:
+
+    >>> data = [[{"key": "a", "value": 1}, {"key": "b", "value": 2}], [{"key": "c", "value": 3}]]
+    >>> pa.array(data, type=pa.map_(pa.string(), pa.int32(), keys_sorted=True))
+    <pyarrow.lib.MapArray object at ...>
+    [
+      keys:
+      [
+        "a",
+        "b"
+      ]
+      values:
+      [
+        1,
+        2
+      ],
+      keys:
+      [
+        "c"
+      ]
+      values:
+      [
+        3
+      ]
+    ]
+    """
+
+@overload
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+@overload
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+def dictionary(*args, **kwargs):
+    """
+    Dictionary (categorical, or simply encoded) type.
+
+    Parameters
+    ----------
+    index_type : DataType
+    value_type : DataType
+    ordered : bool
+
+    Returns
+    -------
+    type : DictionaryType
+
+    Examples
+    --------
+    Create an instance of dictionary type:
+
+    >>> import pyarrow as pa
+    >>> pa.dictionary(pa.int64(), pa.utf8())
+    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
+
+    Use dictionary type to create an array:
+
+    >>> pa.array(["a", "b", None, "d"], pa.dictionary(pa.int64(), pa.utf8()))
+    <pyarrow.lib.DictionaryArray object at ...>
+    ...
+    -- dictionary:
+      [
+        "a",
+        "b",
+        "d"
+      ]
+    -- indices:
+      [
+        0,
+        1,
+        null,
+        2
+      ]
+    """
+
+def struct(
+    fields: Iterable[Field[Any] | tuple[str, Field[Any]] | tuple[str, DataType]]
+    | Mapping[str, Field[Any]],
+) -> StructType:
+    """
+    Create StructType instance from fields.
+
+    A struct is a nested type parameterized by an ordered sequence of types
+    (which can all be distinct), called its fields.
+
+    Parameters
+    ----------
+    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+
+    Examples
+    --------
+    Create an instance of StructType from an iterable of tuples:
+
+    >>> import pyarrow as pa
+    >>> fields = [
+    ...     ("f1", pa.int32()),
+    ...     ("f2", pa.string()),
+    ... ]
+    >>> struct_type = pa.struct(fields)
+    >>> struct_type
+    StructType(struct<f1: int32, f2: string>)
+
+    Retrieve a field from a StructType:
+
+    >>> struct_type[0]
+    pyarrow.Field<f1: int32>
+    >>> struct_type["f1"]
+    pyarrow.Field<f1: int32>
+
+    Create an instance of StructType from an iterable of Fields:
+
+    >>> fields = [
+    ...     pa.field("f1", pa.int32()),
+    ...     pa.field("f2", pa.string(), nullable=False),
+    ... ]
+    >>> pa.struct(fields)
+    StructType(struct<f1: int32, f2: string not null>)
+
+    Returns
+    -------
+    type : DataType
+    """
+
+def sparse_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> SparseUnionType:
+    """
+    Create SparseUnionType from child fields.
+
+    A sparse union is a nested type where each logical value is taken from
+    a single child.  A buffer of 8-bit type ids indicates which child
+    a given logical value is to be taken from.
+
+    In a sparse union, each child array should have the same length as the
+    union array, regardless of the actual number of union values that
+    refer to it.
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : SparseUnionType
+    """
+
+def dense_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> DenseUnionType:
+    """
+    Create DenseUnionType from child fields.
+
+    A dense union is a nested type where each logical value is taken from
+    a single child, at a specific offset.  A buffer of 8-bit type ids
+    indicates which child a given logical value is to be taken from,
+    and a buffer of 32-bit offsets indicates at which physical position
+    in the given child array the logical value is to be taken from.
+
+    Unlike a sparse union, a dense union allows encoding only the child array
+    values which are actually referred to by the union array.  This is
+    counterbalanced by the additional footprint of the offsets buffer, and
+    the additional indirection cost when looking up values.
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : DenseUnionType
+    """
+
+@overload
+def union(
+    child_fields: list[Field[Any]], mode: Literal["sparse"], type_codes: list[int] | None = None
+) -> SparseUnionType: ...
+@overload
+def union(
+    child_fields: list[Field[Any]], mode: Literal["dense"], type_codes: list[int] | None = None
+) -> DenseUnionType: ...
+def union(*args, **kwargs):
+    """
+    Create UnionType from child fields.
+
+    A union is a nested type where each logical value is taken from a
+    single child.  A buffer of 8-bit type ids indicates which child
+    a given logical value is to be taken from.
+
+    Unions come in two flavors: sparse and dense
+    (see also `pyarrow.sparse_union` and `pyarrow.dense_union`).
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    mode : str
+        Must be 'sparse' or 'dense'
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : UnionType
+    """
+
+def run_end_encoded(
+    run_end_type: _RunEndType, value_type: _BasicValueT
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]:
+    """
+    Create RunEndEncodedType from run-end and value types.
+
+    Parameters
+    ----------
+    run_end_type : pyarrow.DataType
+        The integer type of the run_ends array. Must be 'int16', 'int32', or 'int64'.
+    value_type : pyarrow.DataType
+        The type of the values array.
+
+    Returns
+    -------
+    type : RunEndEncodedType
+    """
+
+def json_(storage_type: DataType = ...) -> JsonType:
+    """
+    Create instance of JSON extension type.
+
+    Parameters
+    ----------
+    storage_type : DataType, default pyarrow.string()
+        The underlying data type. Can be on of the following types:
+        string, large_string, string_view.
+
+    Returns
+    -------
+    type : JsonType
+
+    Examples
+    --------
+    Create an instance of JSON extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.json_(pa.utf8())
+    JsonType(extension<arrow.json>)
+
+    Use the JSON type to create an array:
+
+    >>> pa.array(['{"a": 1}', '{"b": 2}'], type=pa.json_(pa.utf8()))
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      "{"a": 1}",
+      "{"b": 2}"
+    ]
+    """
+
+def uuid() -> UuidType:
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+def fixed_shape_tensor(
+    value_type: _ValueT,
+    shape: Sequence[int],
+    dim_names: Sequence[str] | None = None,
+    permutation: Sequence[int] | None = None,
+) -> FixedShapeTensorType[_ValueT]:
+    """
+    Create instance of fixed shape tensor extension type with shape and optional
+    names of tensor dimensions and indices of the desired logical
+    ordering of dimensions.
+
+    Parameters
+    ----------
+    value_type : DataType
+        Data type of individual tensor elements.
+    shape : tuple or list of integers
+        The physical shape of the contained tensors.
+    dim_names : tuple or list of strings, default None
+        Explicit names to tensor dimensions.
+    permutation : tuple or list integers, default None
+        Indices of the desired ordering of the original dimensions.
+        The indices contain a permutation of the values ``[0, 1, .., N-1]`` where
+        N is the number of dimensions. The permutation indicates which dimension
+        of the logical layout corresponds to which dimension of the physical tensor.
+        For more information on this parameter see
+        :ref:`fixed_shape_tensor_extension`.
+
+    Examples
+    --------
+    Create an instance of fixed shape tensor extension type:
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    >>> tensor_type
+    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
+
+    Inspect the data type:
+
+    >>> tensor_type.value_type
+    DataType(int32)
+    >>> tensor_type.shape
+    [2, 2]
+
+    Create a table with fixed shape tensor extension array:
+
+    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    >>> tensor = pa.ExtensionArray.from_storage(tensor_type, storage)
+    >>> pa.table([tensor], names=["tensor_array"])
+    pyarrow.Table
+    tensor_array: extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>
+    ----
+    tensor_array: [[[1,2,3,4],[10,20,30,40],[100,200,300,400]]]
+
+    Create an instance of fixed shape tensor extension type with names
+    of tensor dimensions:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), dim_names=["C", "H", "W"])
+    >>> tensor_type.dim_names
+    ['C', 'H', 'W']
+
+    Create an instance of fixed shape tensor extension type with
+    permutation:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type.permutation
+    [0, 2, 1]
+
+    Returns
+    -------
+    type : FixedShapeTensorType
+    """
+
+def bool8() -> Bool8Type:
+    """
+    Create instance of bool8 extension type.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.bool8()
+    >>> type
+    Bool8Type(extension<arrow.bool8>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(int8)
+
+    Create a table with a bool8 array:
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.bool8>
+    ----
+    unknown_col: [[-1,0,1,2,null]]
+
+    Returns
+    -------
+    type : Bool8Type
+    """
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType:
+    """
+    Create instance of opaque extension type.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The underlying data type.
+    type_name : str
+        The name of the type in the external system.
+    vendor_name : str
+        The name of the external system.
+
+    Examples
+    --------
+    Create an instance of an opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.opaque(pa.binary(), "other", "jdbc")
+    >>> type
+    OpaqueType(extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(binary)
+    >>> type.type_name
+    'other'
+    >>> type.vendor_name
+    'jdbc'
+
+    Create a table with an opaque array:
+
+    >>> arr = [None, b"foobar"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>
+    ----
+    unknown_col: [[null,666F6F626172]]
+
+    Returns
+    -------
+    type : OpaqueType
+    """
+
+@overload
+def type_for_alias(name: Literal["null"]) -> NullType: ...
+@overload
+def type_for_alias(name: Literal["bool", "boolean"]) -> BoolType: ...
+@overload
+def type_for_alias(name: Literal["i1", "int8"]) -> Int8Type: ...
+@overload
+def type_for_alias(name: Literal["i2", "int16"]) -> Int16Type: ...
+@overload
+def type_for_alias(name: Literal["i4", "int32"]) -> Int32Type: ...
+@overload
+def type_for_alias(name: Literal["i8", "int64"]) -> Int64Type: ...
+@overload
+def type_for_alias(name: Literal["u1", "uint8"]) -> UInt8Type: ...
+@overload
+def type_for_alias(name: Literal["u2", "uint16"]) -> UInt16Type: ...
+@overload
+def type_for_alias(name: Literal["u4", "uint32"]) -> Uint32Type: ...
+@overload
+def type_for_alias(name: Literal["u8", "uint64"]) -> UInt64Type: ...
+@overload
+def type_for_alias(name: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
+@overload
+def type_for_alias(name: Literal["f4", "float", "float32"]) -> Float32Type: ...
+@overload
+def type_for_alias(name: Literal["f8", "double", "float64"]) -> Float64Type: ...
+@overload
+def type_for_alias(name: Literal["string", "str", "utf8"]) -> StringType: ...
+@overload
+def type_for_alias(name: Literal["binary"]) -> BinaryType: ...
+@overload
+def type_for_alias(
+    name: Literal["large_string", "large_str", "large_utf8"],
+) -> LargeStringType: ...
+@overload
+def type_for_alias(name: Literal["large_binary"]) -> LargeBinaryType: ...
+@overload
+def type_for_alias(name: Literal["binary_view"]) -> BinaryViewType: ...
+@overload
+def type_for_alias(name: Literal["string_view"]) -> StringViewType: ...
+@overload
+def type_for_alias(name: Literal["date32", "date32[day]"]) -> Date32Type: ...
+@overload
+def type_for_alias(name: Literal["date64", "date64[ms]"]) -> Date64Type: ...
+@overload
+def type_for_alias(name: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
+@overload
+def type_for_alias(name: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
+@overload
+def type_for_alias(name: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
+@overload
+def type_for_alias(name: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
+@overload
+def type_for_alias(name: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
+@overload
+def type_for_alias(name: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
+def type_for_alias(name):
+    """
+    Return DataType given a string alias if one exists.
+
+    Parameters
+    ----------
+    name : str
+        The alias of the DataType that should be retrieved.
+
+    Returns
+    -------
+    type : DataType
+    """
+
+@overload
+def ensure_type(ty: None, allow_none: Literal[True]) -> None: ...
+@overload
+def ensure_type(ty: _DataTypeT) -> _DataTypeT: ...
+@overload
+def ensure_type(ty: Literal["null"]) -> NullType: ...
+@overload
+def ensure_type(ty: Literal["bool", "boolean"]) -> BoolType: ...
+@overload
+def ensure_type(ty: Literal["i1", "int8"]) -> Int8Type: ...
+@overload
+def ensure_type(ty: Literal["i2", "int16"]) -> Int16Type: ...
+@overload
+def ensure_type(ty: Literal["i4", "int32"]) -> Int32Type: ...
+@overload
+def ensure_type(ty: Literal["i8", "int64"]) -> Int64Type: ...
+@overload
+def ensure_type(ty: Literal["u1", "uint8"]) -> UInt8Type: ...
+@overload
+def ensure_type(ty: Literal["u2", "uint16"]) -> UInt16Type: ...
+@overload
+def ensure_type(ty: Literal["u4", "uint32"]) -> Uint32Type: ...
+@overload
+def ensure_type(ty: Literal["u8", "uint64"]) -> UInt64Type: ...
+@overload
+def ensure_type(ty: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
+@overload
+def ensure_type(ty: Literal["f4", "float", "float32"]) -> Float32Type: ...
+@overload
+def ensure_type(ty: Literal["f8", "double", "float64"]) -> Float64Type: ...
+@overload
+def ensure_type(ty: Literal["string", "str", "utf8"]) -> StringType: ...
+@overload
+def ensure_type(ty: Literal["binary"]) -> BinaryType: ...
+@overload
+def ensure_type(
+    ty: Literal["large_string", "large_str", "large_utf8"],
+) -> LargeStringType: ...
+@overload
+def ensure_type(ty: Literal["large_binary"]) -> LargeBinaryType: ...
+@overload
+def ensure_type(ty: Literal["binary_view"]) -> BinaryViewType: ...
+@overload
+def ensure_type(ty: Literal["string_view"]) -> StringViewType: ...
+@overload
+def ensure_type(ty: Literal["date32", "date32[day]"]) -> Date32Type: ...
+@overload
+def ensure_type(ty: Literal["date64", "date64[ms]"]) -> Date64Type: ...
+@overload
+def ensure_type(ty: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
+@overload
+def ensure_type(ty: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
+@overload
+def ensure_type(ty: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
+@overload
+def ensure_type(ty: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
+@overload
+def ensure_type(ty: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
+@overload
+def ensure_type(ty: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
+def schema(
+    fields: Iterable[Field[Any]] | Iterable[tuple[str, DataType]] | Mapping[str, DataType],
+    metadata: dict[bytes | str, bytes | str] | None = None,
+) -> Schema:
+    """
+    Construct pyarrow.Schema from collection of fields.
+
+    Parameters
+    ----------
+    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
+        Can also pass an object that implements the Arrow PyCapsule Protocol
+        for schemas (has an ``__arrow_c_schema__`` method).
+    metadata : dict, default None
+        Keys and values must be coercible to bytes.
+
+    Examples
+    --------
+    Create a Schema from iterable of tuples:
+
+    >>> import pyarrow as pa
+    >>> pa.schema(
+    ...     [
+    ...         ("some_int", pa.int32()),
+    ...         ("some_string", pa.string()),
+    ...         pa.field("some_required_string", pa.string(), nullable=False),
+    ...     ]
+    ... )
+    some_int: int32
+    some_string: string
+    some_required_string: string not null
+
+    Create a Schema from iterable of Fields:
+
+    >>> pa.schema([pa.field("some_int", pa.int32()), pa.field("some_string", pa.string())])
+    some_int: int32
+    some_string: string
+
+    DataTypes can also be passed as strings. The following is equivalent to the
+    above example:
+
+    >>> pa.schema([pa.field("some_int", "int32"), pa.field("some_string", "string")])
+    some_int: int32
+    some_string: string
+
+    Or more concisely:
+
+    >>> pa.schema([("some_int", "int32"), ("some_string", "string")])
+    some_int: int32
+    some_string: string
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+    """
+
+def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType:
+    """
+    Convert NumPy dtype to pyarrow.DataType.
+
+    Parameters
+    ----------
+    dtype : the numpy dtype to convert
+
+
+    Examples
+    --------
+    Create a pyarrow DataType from NumPy dtype:
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> pa.from_numpy_dtype(np.dtype("float16"))
+    DataType(halffloat)
+    >>> pa.from_numpy_dtype("U")
+    DataType(string)
+    >>> pa.from_numpy_dtype(bool)
+    DataType(bool)
+    >>> pa.from_numpy_dtype(np.str_)
+    DataType(string)
+    """
+
+def is_boolean_value(obj: Any) -> bool:
+    """
+    Check if the object is a boolean.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_integer_value(obj: Any) -> bool:
+    """
+    Check if the object is an integer.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_float_value(obj: Any) -> bool:
+    """
+    Check if the object is a float.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+__all__ = [
+    "_Weakrefable",
+    "_Metadata",
+    "DataType",
+    "_BasicDataType",
+    "NullType",
+    "BoolType",
+    "UInt8Type",
+    "Int8Type",
+    "UInt16Type",
+    "Int16Type",
+    "Uint32Type",
+    "Int32Type",
+    "UInt64Type",
+    "Int64Type",
+    "Float16Type",
+    "Float32Type",
+    "Float64Type",
+    "Date32Type",
+    "Date64Type",
+    "MonthDayNanoIntervalType",
+    "StringType",
+    "LargeStringType",
+    "StringViewType",
+    "BinaryType",
+    "LargeBinaryType",
+    "BinaryViewType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "ListType",
+    "LargeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "FixedSizeListType",
+    "DictionaryMemo",
+    "DictionaryType",
+    "MapType",
+    "StructType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "RunEndEncodedType",
+    "BaseExtensionType",
+    "ExtensionType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "PyExtensionType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "KeyValueMetadata",
+    "ensure_metadata",
+    "Field",
+    "Schema",
+    "unify_schemas",
+    "field",
+    "null",
+    "bool_",
+    "uint8",
+    "int8",
+    "uint16",
+    "int16",
+    "uint32",
+    "int32",
+    "int64",
+    "uint64",
+    "tzinfo_to_string",
+    "string_to_tzinfo",
+    "timestamp",
+    "time32",
+    "time64",
+    "duration",
+    "month_day_nano_interval",
+    "date32",
+    "date64",
+    "float16",
+    "float32",
+    "float64",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "string",
+    "utf8",
+    "binary",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "binary_view",
+    "string_view",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "dictionary",
+    "struct",
+    "sparse_union",
+    "dense_union",
+    "union",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "type_for_alias",
+    "ensure_type",
+    "schema",
+    "from_numpy_dtype",
+    "is_boolean_value",
+    "is_integer_value",
+    "is_float_value",
+]
diff --git a/python/stubs/_azurefs.pyi b/python/stubs/_azurefs.pyi
new file mode 100644
index 00000000000..317943ce20f
--- /dev/null
+++ b/python/stubs/_azurefs.pyi
@@ -0,0 +1,74 @@
+from typing import Literal
+
+from ._fs import FileSystem
+
+class AzureFileSystem(FileSystem):
+    """
+    Azure Blob Storage backed FileSystem implementation
+
+    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
+    features will be used when they provide a performance advantage. Azurite emulator is
+    also supported. Note: `/` is the only supported delimiter.
+
+    The storage account is considered the root of the filesystem. When enabled, containers
+    will be created or deleted during relevant directory operations. Obviously, this also
+    requires authentication with the additional permissions.
+
+    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
+    is used for authentication. This means it will try several types of authentication
+    and go with the first one that works. If any authentication parameters are provided when
+    initialising the FileSystem, they will be used instead of the default credential.
+
+    Parameters
+    ----------
+    account_name : str
+        Azure Blob Storage account name. This is the globally unique identifier for the
+        storage account.
+    account_key : str, default None
+        Account key of the storage account. If sas_token and account_key are None the
+        default credential will be used. The parameters account_key and sas_token are
+        mutually exclusive.
+    blob_storage_authority : str, default None
+        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
+        for connecting to a local emulator, like Azurite.
+    dfs_storage_authority : str, default None
+        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
+        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
+    blob_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    dfs_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    sas_token : str, default None
+        SAS token for the storage account, used as an alternative to account_key. If sas_token
+        and account_key are None the default credential will be used. The parameters
+        account_key and sas_token are mutually exclusive.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> azure_fs = fs.AzureFileSystem(account_name="myaccount")
+    >>> azurite_fs = fs.AzureFileSystem(
+    ...     account_name="devstoreaccount1",
+    ...     account_key="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
+    ...     blob_storage_authority="127.0.0.1:10000",
+    ...     dfs_storage_authority="127.0.0.1:10000",
+    ...     blob_storage_scheme="http",
+    ...     dfs_storage_scheme="http",
+    ... )
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+
+    def __init__(
+        self,
+        account_name: str,
+        account_key: str | None = None,
+        blob_storage_authority: str | None = None,
+        dfs_storage_authority: str | None = None,
+        blob_storage_schema: Literal["http", "https"] = "https",
+        dfs_storage_schema: Literal["http", "https"] = "https",
+        sas_token: str | None = None,
+    ) -> None: ...
diff --git a/python/stubs/_compute.pyi b/python/stubs/_compute.pyi
new file mode 100644
index 00000000000..3d61ae42787
--- /dev/null
+++ b/python/stubs/_compute.pyi
@@ -0,0 +1,1721 @@
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Sequence,
+    TypeAlias,
+    TypedDict,
+    overload,
+)
+
+from . import lib
+
+_Order: TypeAlias = Literal["ascending", "descending"]
+_Placement: TypeAlias = Literal["at_start", "at_end"]
+
+class Kernel(lib._Weakrefable):
+    """
+    A kernel object.
+
+    Kernels handle the execution of a Function for a certain signature.
+    """
+
+class Function(lib._Weakrefable):
+    """
+    A compute function.
+
+    A function implements a certain logical computation over a range of
+    possible input signatures.  Each signature accepts a range of input
+    types and is implemented by a given Kernel.
+
+    Functions can be of different kinds:
+
+    * "scalar" functions apply an item-wise computation over all items
+      of their inputs.  Each item in the output only depends on the values
+      of the inputs at the same position.  Examples: addition, comparisons,
+      string predicates...
+
+    * "vector" functions apply a collection-wise computation, such that
+      each item in the output may depend on the values of several items
+      in each input.  Examples: dictionary encoding, sorting, extracting
+      unique values...
+
+    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
+      applying a reduction function.  Examples: sum, min_max, mode...
+
+    * "hash_aggregate" functions apply a reduction function to an input
+      subdivided by grouping criteria.  They may not be directly called.
+      Examples: hash_sum, hash_min_max...
+
+    * "meta" functions dispatch to other functions.
+    """
+    @property
+    def arity(self) -> int:
+        """
+        The function arity.
+
+        If Ellipsis (i.e. `...`) is returned, the function takes a variable
+        number of arguments.
+        """
+    @property
+    def kind(
+        self,
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]:
+        """
+        The function kind.
+        """
+    @property
+    def name(self) -> str:
+        """
+        The function name.
+        """
+    @property
+    def num_kernels(self) -> int:
+        """
+        The number of kernels implementing this function.
+        """
+    def call(
+        self,
+        args: Iterable,
+        options: FunctionOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        length: int | None = None,
+    ) -> Any:
+        """
+        Call the function on the given arguments.
+
+        Parameters
+        ----------
+        args : iterable
+            The arguments to pass to the function.  Accepted types depend
+            on the specific function.
+        options : FunctionOptions, optional
+            Options instance for executing this function.  This should have
+            the right concrete options type.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        length : int, optional
+            Batch size for execution, for nullary (no argument) functions. If
+            not passed, will be inferred from passed data.
+        """
+
+class FunctionOptions(lib._Weakrefable):
+    def serialize(self) -> lib.Buffer: ...
+    @classmethod
+    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
+
+class FunctionRegistry(lib._Weakrefable):
+    def get_function(self, name: str) -> Function:
+        """
+        Look up a function by name in the registry.
+
+        Parameters
+        ----------
+        name : str
+            The name of the function to lookup
+        """
+
+    def list_functions(self) -> list[str]:
+        """
+        Return all function names in the registry.
+        """
+
+class HashAggregateFunction(Function): ...
+class HashAggregateKernel(Kernel): ...
+class ScalarAggregateFunction(Function): ...
+class ScalarAggregateKernel(Kernel): ...
+class ScalarFunction(Function): ...
+class ScalarKernel(Kernel): ...
+class VectorFunction(Function): ...
+class VectorKernel(Kernel): ...
+
+# ==================== _compute.pyx Option classes ====================
+class ArraySortOptions(FunctionOptions):
+    """
+    Options for the `array_sort_indices` function.
+
+    Parameters
+    ----------
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self,
+        order: _Order = "ascending",
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class AssumeTimezoneOptions(FunctionOptions):
+    """
+    Options for the `assume_timezone` function.
+
+    Parameters
+    ----------
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    """
+
+    def __init__(
+        self,
+        timezone: str,
+        *,
+        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    ) -> None: ...
+
+class CastOptions(FunctionOptions):
+    """
+    Options for the `cast` function.
+
+    Parameters
+    ----------
+    target_type : DataType, optional
+        The PyArrow type to cast to.
+    allow_int_overflow : bool, default False
+        Whether integer overflow is allowed when casting.
+    allow_time_truncate : bool, default False
+        Whether time precision truncation is allowed when casting.
+    allow_time_overflow : bool, default False
+        Whether date/time range overflow is allowed when casting.
+    allow_decimal_truncate : bool, default False
+        Whether decimal precision truncation is allowed when casting.
+    allow_float_truncate : bool, default False
+        Whether floating-point precision truncation is allowed when casting.
+    allow_invalid_utf8 : bool, default False
+        Whether producing invalid utf8 data is allowed when casting.
+    """
+
+    allow_int_overflow: bool
+    allow_time_truncate: bool
+    allow_time_overflow: bool
+    allow_decimal_truncate: bool
+    allow_float_truncate: bool
+    allow_invalid_utf8: bool
+
+    def __init__(
+        self,
+        target_type: lib.DataType | None = None,
+        *,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+    @staticmethod
+    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    @staticmethod
+    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    def is_safe(self) -> bool: ...
+
+class CountOptions(FunctionOptions):
+    """
+    Options for the `count` function.
+
+    Parameters
+    ----------
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    """
+    def __init__(self, mode: Literal["only_valid", "only_null", "all"] = "only_valid") -> None: ...
+
+class CumulativeOptions(FunctionOptions):
+    """
+    Options for `cumulative_*` functions.
+
+    - cumulative_sum
+    - cumulative_sum_checked
+    - cumulative_prod
+    - cumulative_prod_checked
+    - cumulative_max
+    - cumulative_min
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class CumulativeSumOptions(FunctionOptions):
+    """
+    Options for `cumulative_sum` function.
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for sum computation
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class DayOfWeekOptions(FunctionOptions):
+    """
+    Options for the `day_of_week` function.
+
+    Parameters
+    ----------
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    """
+
+    def __init__(self, *, count_from_zero: bool = True, week_start: int = 1) -> None: ...
+
+class DictionaryEncodeOptions(FunctionOptions):
+    """
+    Options for dictionary encoding.
+
+    Parameters
+    ----------
+    null_encoding : str, default "mask"
+        How to encode nulls in the input.
+        Accepted values are "mask" (null inputs emit a null in the indices
+        array), "encode" (null inputs emit a non-null index pointing to
+        a null value in the dictionary array).
+    """
+    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
+
+class RunEndEncodeOptions(FunctionOptions):
+    """
+    Options for run-end encoding.
+
+    Parameters
+    ----------
+    run_end_type : DataType, default pyarrow.int32()
+        The data type of the run_ends array.
+
+        Accepted values are pyarrow.{int16(), int32(), int64()}.
+    """
+    # TODO: default is DataType(int32)
+    def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
+
+class ElementWiseAggregateOptions(FunctionOptions):
+    """
+    Options for element-wise aggregate functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    """
+    def __init__(self, *, skip_nulls: bool = True) -> None: ...
+
+class ExtractRegexOptions(FunctionOptions):
+    """
+    Options for the `extract_regex` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class ExtractRegexSpanOptions(FunctionOptions):
+    """
+    Options for the `extract_regex_span` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class FilterOptions(FunctionOptions):
+    """
+    Options for selecting with a boolean filter.
+
+    Parameters
+    ----------
+    null_selection_behavior : str, default "drop"
+        How to handle nulls in the selection filter.
+        Accepted values are "drop", "emit_null".
+    """
+
+    def __init__(self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
+
+class IndexOptions(FunctionOptions):
+    """
+    Options for the `index` function.
+
+    Parameters
+    ----------
+    value : Scalar
+        The value to search for.
+    """
+    def __init__(self, value: lib.Scalar) -> None: ...
+
+class JoinOptions(FunctionOptions):
+    """
+    Options for the `binary_join_element_wise` function.
+
+    Parameters
+    ----------
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    """
+    @overload
+    def __init__(self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+    @overload
+    def __init__(self, null_handling: Literal["replace"], null_replacement: str = "") -> None: ...
+
+class ListSliceOptions(FunctionOptions):
+    """
+    Options for list array slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    """
+    def __init__(
+        self,
+        start: int,
+        stop: int | None = None,
+        step: int = 1,
+        return_fixed_size_list: bool | None = None,
+    ) -> None: ...
+
+class ListFlattenOptions(FunctionOptions):
+    """
+    Options for `list_flatten` function
+
+    Parameters
+    ----------
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    """
+    def __init__(self, recursive: bool = False) -> None: ...
+
+class MakeStructOptions(FunctionOptions):
+    """
+    Options for the `make_struct` function.
+
+    Parameters
+    ----------
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    """
+    def __init__(
+        self,
+        field_names: Sequence[str] = (),
+        *,
+        field_nullability: Sequence[bool] | None = None,
+        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
+    ) -> None: ...
+
+class MapLookupOptions(FunctionOptions):
+    """
+    Options for the `map_lookup` function.
+
+    Parameters
+    ----------
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    """
+    # TODO: query_key: Scalar or Object can be converted to Scalar
+    def __init__(
+        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
+    ) -> None: ...
+
+class MatchSubstringOptions(FunctionOptions):
+    """
+    Options for looking for a substring.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    """
+
+    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
+
+class ModeOptions(FunctionOptions):
+    """
+    Options for the `mode` function.
+
+    Parameters
+    ----------
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class NullOptions(FunctionOptions):
+    """
+    Options for the `is_null` function.
+
+    Parameters
+    ----------
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    """
+    def __init__(self, *, nan_is_null: bool = False) -> None: ...
+
+class PadOptions(FunctionOptions):
+    """
+    Options for padding strings.
+
+    Parameters
+    ----------
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    """
+    def __init__(
+        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
+    ) -> None: ...
+
+class PairwiseOptions(FunctionOptions):
+    """
+    Options for `pairwise` functions.
+
+    Parameters
+    ----------
+    period : int, default 1
+        Period for applying the period function.
+    """
+    def __init__(self, period: int = 1) -> None: ...
+
+class PartitionNthOptions(FunctionOptions):
+    """
+    Options for the `partition_nth_indices` function.
+
+    Parameters
+    ----------
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(self, pivot: int, *, null_placement: _Placement = "at_end") -> None: ...
+
+class WinsorizeOptions(FunctionOptions):
+    """
+    Options for the `winsorize` function.
+
+    Parameters
+    ----------
+    lower_limit : float, between 0 and 1
+        The quantile below which all values are replaced with the quantile's value.
+    upper_limit : float, between 0 and 1
+        The quantile above which all values are replaced with the quantile's value.
+    """
+    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
+
+class QuantileOptions(FunctionOptions):
+    """
+    Options for the `quantile` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float],
+        *,
+        interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class RandomOptions(FunctionOptions):
+    """
+    Options for random generation.
+
+    Parameters
+    ----------
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    """
+    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
+
+class RankOptions(FunctionOptions):
+    """
+    Options for the `rank` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    """
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    ) -> None: ...
+
+class RankQuantileOptions(FunctionOptions):
+    """
+    Options for the `rank_quantile` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class PivotWiderOptions(FunctionOptions):
+    """
+    Options for the `pivot_wider` function.
+
+    Parameters
+    ----------
+    key_names : sequence of str
+        The pivot key names expected in the pivot key column.
+        For each entry in `key_names`, a column with the same name is emitted
+        in the struct output.
+    unexpected_key_behavior : str, default "ignore"
+        The behavior when pivot keys not in `key_names` are encountered.
+        Accepted values are "ignore", "raise".
+        If "ignore", unexpected keys are silently ignored.
+        If "raise", unexpected keys raise a KeyError.
+    """
+    def __init__(
+        self,
+        key_names: Sequence[str],
+        *,
+        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    ) -> None: ...
+
+class ReplaceSliceOptions(FunctionOptions):
+    """
+    Options for replacing slices.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    """
+    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
+
+class ReplaceSubstringOptions(FunctionOptions):
+    """
+    Options for replacing matched substrings.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    """
+    def __init__(
+        self, pattern: str, replacement: str, *, max_replacements: int | None = None
+    ) -> None: ...
+
+_RoundMode: TypeAlias = Literal[
+    "down",
+    "up",
+    "towards_zero",
+    "towards_infinity",
+    "half_down",
+    "half_up",
+    "half_towards_zero",
+    "half_towards_infinity",
+    "half_to_even",
+    "half_to_odd",
+]
+
+class RoundBinaryOptions(FunctionOptions):
+    """
+    Options for rounding numbers when ndigits is provided by a second array
+
+    Parameters
+    ----------
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+class RoundOptions(FunctionOptions):
+    """
+    Options for rounding numbers.
+
+    Parameters
+    ----------
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        ndigits: int = 0,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+_DateTimeUint: TypeAlias = Literal[
+    "year",
+    "quarter",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond",
+    "nanosecond",
+]
+
+class RoundTemporalOptions(FunctionOptions):
+    """
+    Options for rounding temporal values.
+
+    Parameters
+    ----------
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    """
+    def __init__(
+        self,
+        multiple: int = 1,
+        unit: _DateTimeUint = "day",
+        *,
+        week_starts_monday: bool = True,
+        ceil_is_strictly_greater: bool = False,
+        calendar_based_origin: bool = False,
+    ) -> None: ...
+
+class RoundToMultipleOptions(FunctionOptions):
+    """
+    Options for rounding numbers to a multiple.
+
+    Parameters
+    ----------
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(self, multiple: float = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
+
+class ScalarAggregateOptions(FunctionOptions):
+    """
+    Options for scalar aggregations.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
+
+class SelectKOptions(FunctionOptions):
+    """
+    Options for top/bottom k-selection.
+
+    Parameters
+    ----------
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    """
+
+    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
+
+class SetLookupOptions(FunctionOptions):
+    """
+    Options for the `is_in` and `index_in` functions.
+
+    Parameters
+    ----------
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    """
+    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
+
+class SliceOptions(FunctionOptions):
+    """
+    Options for slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    """
+
+    def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
+
+class SortOptions(FunctionOptions):
+    """
+    Options for the `sort_indices` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
+    ) -> None: ...
+
+class SplitOptions(FunctionOptions):
+    """
+    Options for splitting on whitespace.
+
+    Parameters
+    ----------
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+
+    def __init__(self, *, max_splits: int | None = None, reverse: bool = False) -> None: ...
+
+class SplitPatternOptions(FunctionOptions):
+    """
+    Options for splitting on a string pattern.
+
+    Parameters
+    ----------
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+    def __init__(
+        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
+    ) -> None: ...
+
+class StrftimeOptions(FunctionOptions):
+    """
+    Options for the `strftime` function.
+
+    Parameters
+    ----------
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    """
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", locale: str = "C") -> None: ...
+
+class StrptimeOptions(FunctionOptions):
+    """
+    Options for the `strptime` function.
+
+    Parameters
+    ----------
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    """
+    def __init__(
+        self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
+    ) -> None: ...
+
+class StructFieldOptions(FunctionOptions):
+    """
+    Options for the `struct_field` function.
+
+    Parameters
+    ----------
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    """
+    def __init__(
+        self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
+    ) -> None: ...
+
+class TakeOptions(FunctionOptions):
+    """
+    Options for the `take` and `array_take` functions.
+
+    Parameters
+    ----------
+    boundscheck : boolean, default True
+        Whether to check indices are within bounds. If False and an
+        index is out of bounds, behavior is undefined (the process
+        may crash).
+    """
+    def __init__(self, boundscheck: bool = True) -> None: ...
+
+class TDigestOptions(FunctionOptions):
+    """
+    Options for the `tdigest` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        delta: int = 100,
+        buffer_size: int = 500,
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class TrimOptions(FunctionOptions):
+    """
+    Options for trimming characters from strings.
+
+    Parameters
+    ----------
+    characters : str
+        Individual characters to be trimmed from the string.
+    """
+    def __init__(self, characters: str) -> None: ...
+
+class Utf8NormalizeOptions(FunctionOptions):
+    """
+    Options for the `utf8_normalize` function.
+
+    Parameters
+    ----------
+    form : str
+        Unicode normalization form.
+        Accepted values are "NFC", "NFKC", "NFD", NFKD".
+    """
+
+    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
+
+class VarianceOptions(FunctionOptions):
+    """
+    Options for the `variance` and `stddev` functions.
+
+    Parameters
+    ----------
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class SkewOptions(FunctionOptions):
+    """
+    Options for the `skew` and `kurtosis` functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    biased : bool, default True
+        Whether the calculated value is biased.
+        If False, the value computed includes a correction factor to reduce bias.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
+    ) -> None: ...
+
+class WeekOptions(FunctionOptions):
+    """
+    Options for the `week` function.
+
+    Parameters
+    ----------
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    """
+    def __init__(
+        self,
+        *,
+        week_starts_monday: bool = True,
+        count_from_zero: bool = False,
+        first_week_is_fully_in_year: bool = False,
+    ) -> None: ...
+
+# ==================== _compute.pyx Functions ====================
+
+def call_function(
+    name: str,
+    args: list,
+    options: FunctionOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+    length: int | None = None,
+) -> Any:
+    """
+    Call a named function.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to call.
+    args : list
+        The arguments to the function.
+    options : optional
+        options provided to the function.
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+    length : int, optional
+        Batch size for execution, for nullary (no argument) functions. If not
+        passed, inferred from data.
+    """
+
+def function_registry() -> FunctionRegistry: ...
+def get_function(name: str) -> Function:
+    """
+    Get a function by name.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to lookup
+    """
+
+def list_functions() -> list[str]:
+    """
+    Return all function names in the global registry.
+    """
+
+# ==================== _compute.pyx Udf ====================
+
+def call_tabular_function(
+    function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
+) -> lib.RecordBatchReader:
+    """
+    Get a record batch iterator from a tabular function.
+
+    Parameters
+    ----------
+    function_name : str
+        Name of the function.
+    args : iterable
+        The arguments to pass to the function.  Accepted types depend
+        on the specific function.  Currently, only an empty args is supported.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+class _FunctionDoc(TypedDict):
+    summary: str
+    description: str
+
+def register_scalar_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined scalar function.
+
+    This API is EXPERIMENTAL.
+
+    A scalar function is a function that executes elementwise
+    operations on arrays or scalars, i.e. a scalar function must
+    be computed row-by-row with no state where each output row
+    is computed only from its corresponding input row.
+    In other words, all argument arrays have the same length,
+    and the output array is of the same length as the arguments.
+    Scalar functions are the only functions allowed in query engine
+    expressions.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple udf"
+    >>> func_doc["description"] = "add a constant to a scalar"
+    >>>
+    >>> def add_constant(ctx, array):
+    ...     return pc.add(array, 1, memory_pool=ctx.memory_pool)
+    >>>
+    >>> func_name = "py_add_func"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.int64()
+    >>> pc.register_scalar_function(add_constant, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_add_func'
+    >>> answer = pc.call_function(func_name, [pa.array([20])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      21
+    ]
+    """
+
+def register_tabular_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined tabular function.
+
+    This API is EXPERIMENTAL.
+
+    A tabular function is one accepting a context argument of type
+    UdfContext and returning a generator of struct arrays.
+    The in_types argument must be empty and the out_type argument
+    specifies a schema. Each struct array must have field types
+    corresponding to the schema.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The only argument is the context argument of type
+        UdfContext. It must return a callable that
+        returns on each invocation a StructArray matching
+        the out_type, where an empty array indicates end.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        Must be an empty dictionary (reserved for future use).
+    out_type : Union[Schema, DataType]
+        Schema of the function's output, or a corresponding flat struct type.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+def register_aggregate_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined non-decomposable aggregate function.
+
+    This API is EXPERIMENTAL.
+
+    A non-decomposable aggregation function is a function that executes
+    aggregate operations on the whole data that it is aggregating.
+    In other words, non-decomposable aggregate function cannot be
+    split into consume/merge/finalize steps.
+
+    This is often used with ordered or segmented aggregation where groups
+    can be emit before accumulating all of the input data.
+
+    Note that currently the size of any input column cannot exceed 2 GB
+    for a single segment (all groups combined).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return a Scalar matching the
+        out_type.
+        To define a varargs function, pass a callable that takes
+        *args. The in_type needs to match in type of inputs when
+        the function gets called.
+    function_name : str
+        Name of the function. This name must be unique, i.e.,
+        there should only be one function registered with
+        this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple median udf"
+    >>> func_doc["description"] = "compute median"
+    >>>
+    >>> def compute_median(ctx, array):
+    ...     return pa.scalar(np.median(array))
+    >>>
+    >>> func_name = "py_compute_median"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.float64()
+    >>> pc.register_aggregate_function(compute_median, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_compute_median'
+    >>> answer = pc.call_function(func_name, [pa.array([20, 40])])
+    >>> answer
+    <pyarrow.DoubleScalar: 30.0>
+    >>> table = pa.table([pa.array([1, 1, 2, 2]), pa.array([10, 20, 30, 40])], names=["k", "v"])
+    >>> result = table.group_by("k").aggregate([("v", "py_compute_median")])
+    >>> result
+    pyarrow.Table
+    k: int64
+    v_py_compute_median: double
+    ----
+    k: [[1,2]]
+    v_py_compute_median: [[15,35]]
+    """
+
+def register_vector_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined vector function.
+
+    This API is EXPERIMENTAL.
+
+    A vector function is a function that executes vector
+    operations on arrays. Vector function is often used
+    when compute doesn't fit other more specific types of
+    functions (e.g., scalar and aggregate).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "percent rank"
+    >>> func_doc["description"] = "compute percent rank"
+    >>>
+    >>> def list_flatten_udf(ctx, x):
+    ...     return pc.list_flatten(x)
+    >>>
+    >>> func_name = "list_flatten_udf"
+    >>> in_types = {"array": pa.list_(pa.int64())}
+    >>> out_type = pa.int64()
+    >>> pc.register_vector_function(list_flatten_udf, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> answer = pc.call_function(func_name, [pa.array([[1, 2], [3, 4]])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2,
+      3,
+      4
+    ]
+    """
+
+class UdfContext:
+    """
+    Per-invocation function context/state.
+
+    This object will always be the first argument to a user-defined
+    function. It should not be used outside of a call to the function.
+    """
+
+    @property
+    def batch_length(self) -> int:
+        """
+        The common length of all input arguments (int).
+
+        In the case that all arguments are scalars, this value
+        is used to pass the "actual length" of the arguments,
+        e.g. because the scalar values are encoding a column
+        with a constant value.
+        """
+    @property
+    def memory_pool(self) -> lib.MemoryPool:
+        """
+        A memory pool for allocations (:class:`MemoryPool`).
+
+        This is the memory pool supplied by the user when they invoked
+        the function and it should be used in any calls to arrow that the
+        UDF makes if that call accepts a memory_pool.
+        """
+
+# ==================== _compute.pyx Expression ====================
+class Expression(lib._Weakrefable):
+    """
+    A logical expression to be evaluated against some input.
+
+    To create an expression:
+
+    - Use the factory function ``pyarrow.compute.scalar()`` to create a
+      scalar (not necessary when combined, see example below).
+    - Use the factory function ``pyarrow.compute.field()`` to reference
+      a field (column in table).
+    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
+    - Combine expressions using python operators ``&`` (logical and),
+      ``|`` (logical or) and ``~`` (logical not).
+      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
+      to combine expressions.
+    - Create expression predicates using Expression methods such as
+      ``pyarrow.compute.Expression.isin()``.
+
+    Examples
+    --------
+
+    >>> import pyarrow.compute as pc
+    >>> (pc.field("a") < pc.scalar(3)) | (pc.field("b") > 7)
+    <pyarrow.compute.Expression ((a < 3) or (b > 7))>
+    >>> pc.field("a") != 3
+    <pyarrow.compute.Expression (a != 3)>
+    >>> pc.field("a").isin([1, 2, 3])
+    <pyarrow.compute.Expression is_in(a, {value_set=int64:[
+      1,
+      2,
+      3
+    ], null_matching_behavior=MATCH})>
+    """
+
+    @staticmethod
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression:
+        """
+        Deserialize an expression from Substrait
+
+        The serialized message must be an ExtendedExpression message that has
+        only a single expression.  The name of the expression and the schema
+        the expression was bound to will be ignored.  Use
+        pyarrow.substrait.deserialize_expressions if this information is needed
+        or if the message might contain multiple expressions.
+
+        Parameters
+        ----------
+        message : bytes or Buffer or a protobuf Message
+            The Substrait message to deserialize
+
+        Returns
+        -------
+        Expression
+            The deserialized expression
+        """
+    def to_substrait(self, schema: lib.Schema, allow_arrow_extensions: bool = False) -> lib.Buffer:
+        """
+        Serialize the expression using Substrait
+
+        The expression will be serialized as an ExtendedExpression message that has a
+        single expression named "expression"
+
+        Parameters
+        ----------
+        schema : Schema
+            The input schema the expression will be bound to
+        allow_arrow_extensions : bool, default False
+            If False then only functions that are part of the core Substrait function
+            definitions will be allowed.  Set this to True to allow pyarrow-specific functions
+            but the result may not be accepted by other compute libraries.
+
+        Returns
+        -------
+        Buffer
+            A buffer containing the serialized Protobuf plan.
+        """
+    def __invert__(self) -> Expression: ...
+    def __and__(self, other) -> Expression: ...
+    def __or__(self, other) -> Expression: ...
+    def __add__(self, other) -> Expression: ...
+    def __mul__(self, other) -> Expression: ...
+    def __sub__(self, other) -> Expression: ...
+    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __gt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __lt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __truediv__(self, other) -> Expression: ...
+    def is_valid(self) -> bool:
+        """
+        Check whether the expression is not-null (valid).
+
+        This creates a new expression equivalent to calling the
+        `is_valid` compute function on this expression.
+
+        Returns
+        -------
+        is_valid : Expression
+        """
+    def is_null(self, nan_is_null: bool = False) -> Expression:
+        """
+        Check whether the expression is null.
+
+        This creates a new expression equivalent to calling the
+        `is_null` compute function on this expression.
+
+        Parameters
+        ----------
+        nan_is_null : boolean, default False
+            Whether floating-point NaNs are considered null.
+
+        Returns
+        -------
+        is_null : Expression
+        """
+    def is_nan(self) -> Expression:
+        """
+        Check whether the expression is NaN.
+
+        This creates a new expression equivalent to calling the
+        `is_nan` compute function on this expression.
+
+        Returns
+        -------
+        is_nan : Expression
+        """
+    def cast(
+        self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
+    ) -> Expression:
+        """
+        Explicitly set or change the expression's data type.
+
+        This creates a new expression equivalent to calling the
+        `cast` compute function on this expression.
+
+        Parameters
+        ----------
+        type : DataType, default None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Expression
+        """
+    def isin(self, values: lib.Array | Iterable) -> Expression:
+        """
+        Check whether the expression is contained in values.
+
+        This creates a new expression equivalent to calling the
+        `is_in` compute function on this expression.
+
+        Parameters
+        ----------
+        values : Array or iterable
+            The values to check for.
+
+        Returns
+        -------
+        isin : Expression
+            A new expression that, when evaluated, checks whether
+            this expression's value is contained in `values`.
+        """
+
+# ==================== _compute.py ====================
diff --git a/python/stubs/_csv.pyi b/python/stubs/_csv.pyi
new file mode 100644
index 00000000000..2f49f8c9a6c
--- /dev/null
+++ b/python/stubs/_csv.pyi
@@ -0,0 +1,641 @@
+from dataclasses import dataclass, field
+from typing import IO, Any, Callable, Literal
+
+from _typeshed import StrPath
+
+from . import lib
+
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
+    """
+    Options for reading CSV files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual record batches or table chunks.
+        Minimum valid value for block size is 1
+    skip_rows : int, optional (default 0)
+        The number of rows to skip before the column names (if any)
+        and the CSV data.
+    skip_rows_after_names : int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names are read (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
+    column_names : list, optional
+        The column names of the target table.  If empty, fall back on
+        `autogenerate_column_names`.
+    autogenerate_column_names : bool, optional (default False)
+        Whether to autogenerate column names if `column_names` is empty.
+        If true, column names will be of the form "f0", "f1"...
+        If false, column names will be read from the first CSV row
+        after `skip_rows`.
+    encoding : str, optional (default 'utf8')
+        The character encoding of the CSV data.  Columns that cannot
+        decode using this encoding can still be read as Binary.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = "1,2,3\\nFlamingo,2,2022-03-01\\nHorse,4,2022-03-02\\nBrittle stars,5,2022-03-03\\nCentipede,100,2022-03-04"
+    >>> print(s)
+    1,2,3
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+
+    Ignore the first numbered row and substitute it with defined
+    or autogenerated column names:
+
+    >>> from pyarrow import csv
+    >>> read_options = csv.ReadOptions(column_names=["animals", "n_legs", "entry"], skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    >>> read_options = csv.ReadOptions(autogenerate_column_names=True, skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    f0: string
+    f1: int64
+    f2: date32[day]
+    ----
+    f0: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    f1: [[2,4,5,100]]
+    f2: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    Remove the first 2 rows of the data:
+
+    >>> read_options = csv.ReadOptions(skip_rows_after_names=2)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    1: string
+    2: int64
+    3: date32[day]
+    ----
+    1: [["Brittle stars","Centipede"]]
+    2: [[5,100]]
+    3: [[2022-03-03,2022-03-04]]
+    """
+
+    use_threads: bool = field(default=True, kw_only=False)
+    block_size: int | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: list[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
+    """
+    Options for parsing CSV files.
+
+    Parameters
+    ----------
+    delimiter : 1-character string, optional (default ',')
+        The character delimiting individual cells in the CSV data.
+    quote_char : 1-character string or False, optional (default '"')
+        The character used optionally for quoting CSV values
+        (False if quoting is not allowed).
+    double_quote : bool, optional (default True)
+        Whether two quotes in a quoted CSV value denote a single quote
+        in the data.
+    escape_char : 1-character string or False, optional (default False)
+        The character used optionally for escaping special characters
+        (False if escaping is not allowed).
+    newlines_in_values : bool, optional (default False)
+        Whether newline characters are allowed in CSV values.
+        Setting this to True reduces the performance of multi-threaded
+        CSV reading.
+    ignore_empty_lines : bool, optional (default True)
+        Whether empty lines are ignored in CSV input.
+        If False, an empty line is interpreted as containing a single empty
+        value (assuming a one-column CSV file).
+    invalid_row_handler : callable, optional (default None)
+        If not None, this object is called for each CSV row that fails
+        parsing (because of a mismatching number of columns).
+        It should accept a single InvalidRow argument and return either
+        "skip" or "error" depending on the desired outcome.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals;n_legs;entry\\n"
+    ...     "Flamingo;2;2022-03-01\\n"
+    ...     "# Comment here:\\n"
+    ...     "Horse;4;2022-03-02\\n"
+    ...     "Brittle stars;5;2022-03-03\\n"
+    ...     "Centipede;100;2022-03-04"
+    ... )
+    >>> print(s)
+    animals;n_legs;entry
+    Flamingo;2;2022-03-01
+    # Comment here:
+    Horse;4;2022-03-02
+    Brittle stars;5;2022-03-03
+    Centipede;100;2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Read the data from a file skipping rows with comments
+    and defining the delimiter:
+
+    >>> from pyarrow import csv
+    >>> def skip_comment(row):
+    ...     if row.text.startswith("# "):
+    ...         return "skip"
+    ...     else:
+    ...         return "error"
+    >>> parse_options = csv.ParseOptions(delimiter=";", invalid_row_handler=skip_comment)
+    >>> csv.read_csv(source, parse_options=parse_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+    """
+
+    delimiter: str = field(default=",", kw_only=False)
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], Literal["skip", "error"]] | None = None
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
+    """
+    Options for converting CSV data.
+
+    Parameters
+    ----------
+    check_utf8 : bool, optional (default True)
+        Whether to check UTF8 validity of string columns.
+    column_types : pyarrow.Schema or dict, optional
+        Explicitly map column names to column types. Passing this argument
+        disables type inference on the defined columns.
+    null_values : list, optional
+        A sequence of strings that denote nulls in the data
+        (defaults are appropriate in most cases). Note that by default,
+        string columns are not checked for null values. To enable
+        null checking for those, specify ``strings_can_be_null=True``.
+    true_values : list, optional
+        A sequence of strings that denote true booleans in the data
+        (defaults are appropriate in most cases).
+    false_values : list, optional
+        A sequence of strings that denote false booleans in the data
+        (defaults are appropriate in most cases).
+    decimal_point : 1-character string, optional (default '.')
+        The character used as decimal point in floating-point and decimal
+        data.
+    strings_can_be_null : bool, optional (default False)
+        Whether string / binary columns can have null values.
+        If true, then strings in null_values are considered null for
+        string columns.
+        If false, then all strings are valid string values.
+    quoted_strings_can_be_null : bool, optional (default True)
+        Whether quoted values can be null.
+        If true, then strings in "null_values" are also considered null
+        when they appear quoted in the CSV file. Otherwise, quoted values
+        are never considered null.
+    include_columns : list, optional
+        The names of columns to include in the Table.
+        If empty, the Table will include all columns from the CSV file.
+        If not empty, only these columns will be included, in this order.
+    include_missing_columns : bool, optional (default False)
+        If false, columns in `include_columns` but not in the CSV file will
+        error out.
+        If true, columns in `include_columns` but not in the CSV file will
+        produce a column of nulls (whose type is selected using
+        `column_types`, or null by default).
+        This option is ignored if `include_columns` is empty.
+    auto_dict_encode : bool, optional (default False)
+        Whether to try to automatically dict-encode string / binary data.
+        If true, then when type inference detects a string or binary column,
+        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
+        (per chunk), after which it switches to regular encoding.
+        This setting is ignored for non-inferred columns (those in
+        `column_types`).
+    auto_dict_max_cardinality : int, optional
+        The maximum dictionary cardinality for `auto_dict_encode`.
+        This value is per chunk.
+    timestamp_parsers : list, optional
+        A sequence of strptime()-compatible format strings, tried in order
+        when attempting to infer or convert timestamp values (the special
+        value ISO8601() can also be given).  By default, a fast built-in
+        ISO-8601 parser is used.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry,fast\\n"
+    ...     "Flamingo,2,01/03/2022,Yes\\n"
+    ...     "Horse,4,02/03/2022,Yes\\n"
+    ...     "Brittle stars,5,03/03/2022,No\\n"
+    ...     "Centipede,100,04/03/2022,No\\n"
+    ...     ",6,05/03/2022,"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry,fast
+    Flamingo,2,01/03/2022,Yes
+    Horse,4,02/03/2022,Yes
+    Brittle stars,5,03/03/2022,No
+    Centipede,100,04/03/2022,No
+    ,6,05/03/2022,
+
+    Change the type of a column:
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+    >>> convert_options = csv.ConvertOptions(column_types={"n_legs": pa.float64()})
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: double
+    entry: string
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Define a date parsing format to get a timestamp type column
+    (in case dates are not in ISO format and not converted by default):
+
+    >>> convert_options = csv.ConvertOptions(timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: timestamp[s]
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Specify a subset of columns to be read:
+
+    >>> convert_options = csv.ConvertOptions(include_columns=["animals", "n_legs"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+
+    List additional column to be included as a null typed column:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs", "location"], include_missing_columns=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    location: null
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    location: [5 nulls]
+
+    Define columns as dictionary type (by default only the
+    string/binary columns are dictionary encoded):
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"], auto_dict_encode=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: dictionary<values=string, indices=int32, ordered=0>
+    n_legs: int64
+    entry: timestamp[s]
+    fast: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    animals: [  -- dictionary:
+    ["Flamingo","Horse","Brittle stars","Centipede",""]  -- indices:
+    [0,1,2,3,4]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [  -- dictionary:
+    ["Yes","No",""]  -- indices:
+    [0,0,1,1,2]]
+
+    Set upper limit for the number of categories. If the categories
+    is more than the limit, the conversion to dictionary will not
+    happen:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals"], auto_dict_encode=True, auto_dict_max_cardinality=2
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+
+    Set empty strings to missing values:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs"], strings_can_be_null=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",null]]
+    n_legs: [[2,4,5,100,6]]
+
+    Define values to be True and False when converting a column
+    into a bool type:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["fast"], false_values=["No"], true_values=["Yes"]
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    fast: bool
+    ----
+    fast: [[true,true,false,false,null]]
+    """
+
+    check_utf8: bool = field(default=True, kw_only=False)
+    column_types: lib.Schema | dict | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: list[str] | None = None
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to write an initial header line with column names
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV data
+    delimiter : 1-character string, optional (default ",")
+        The character delimiting individual cells in the CSV data.
+    quoting_style : str, optional (default "needed")
+        Whether to quote values, and if so, which quoting style to use.
+        The following values are accepted:
+
+        - "needed" (default): only enclose values in quotes when needed.
+        - "all_valid": enclose all valid values in quotes; nulls are not quoted.
+        - "none": do not enclose any values in quotes; values containing
+          special characters (such as quotes, cell delimiters or line endings)
+          will raise an error.
+    """
+
+    include_header: bool = field(default=True, kw_only=False)
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
+
+    def validate(self) -> None: ...
+
+@dataclass
+class InvalidRow(lib._Weakrefable):
+    """
+    Description of an invalid row in a CSV file.
+
+    Parameters
+    ----------
+    expected_columns : int
+        The expected number of columns in the row.
+    actual_columns : int
+        The actual number of columns in the row.
+    number : int or None
+        The physical row number if known, otherwise None.
+    text : str
+        The contents of the row.
+    """
+
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+class CSVWriter(lib._CRecordBatchWriter):
+    """
+    Writer to create a CSV file.
+
+    Parameters
+    ----------
+    sink : str, path, pyarrow.OutputStream or file-like object
+        The location where to write the CSV data.
+    schema : pyarrow.Schema
+        The schema of the data to be written.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
+    """
+
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class CSVStreamingReader(lib.RecordBatchReader): ...
+
+ISO8601: lib._Weakrefable
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader:
+    """
+    Open a streaming reader of CSV data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.csv.CSVStreamingReader`
+    """
+
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table:
+    """
+    Read a Table from a stream of CSV data.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the CSV file as a in-memory table.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry\\n"
+    ...     "Flamingo,2,2022-03-01\\n"
+    ...     "Horse,4,2022-03-02\\n"
+    ...     "Brittle stars,5,2022-03-03\\n"
+    ...     "Centipede,100,2022-03-04"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Reading from the file
+
+    >>> from pyarrow import csv
+    >>> csv.read_csv(source)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+    """
+
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None:
+    """
+    Write record batch or table to a CSV file.
+
+    Parameters
+    ----------
+    data : pyarrow.RecordBatch or pyarrow.Table
+        The data to write.
+    output_file : string, path, pyarrow.NativeFile, or file-like object
+        The location where to write the CSV data.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+
+    >>> legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> entry_date = pa.array(["01/03/2022", "02/03/2022", "03/03/2022", "04/03/2022"])
+    >>> table = pa.table([animals, legs, entry_date], names=["animals", "n_legs", "entry"])
+
+    >>> csv.write_csv(table, "animals.csv")
+
+    >>> write_options = csv.WriteOptions(include_header=False)
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
+
+    >>> write_options = csv.WriteOptions(delimiter=";")
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
+    """
diff --git a/python/stubs/_cuda.pyi b/python/stubs/_cuda.pyi
new file mode 100644
index 00000000000..ad52b2f380f
--- /dev/null
+++ b/python/stubs/_cuda.pyi
@@ -0,0 +1,556 @@
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+
+from . import lib
+from ._stubs_typing import ArrayLike
+
+class Context(lib._Weakrefable):
+    """
+    CUDA driver context.
+    """
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
+        """
+        Create a CUDA driver context for a particular device.
+
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
+
+        Parameters
+        ----------
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
+          requested.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
+        """
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context:
+        """
+        Create a Context instance from a Numba CUDA context.
+
+        Parameters
+        ----------
+        context : {numba.cuda.cudadrv.driver.Context, None}
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
+
+        Returns
+        -------
+        shared_context : pyarrow.cuda.Context
+          Context instance.
+        """
+    def to_numba(self) -> _numba_driver.Context:
+        """
+        Convert Context to a Numba CUDA context.
+
+        Returns
+        -------
+        context : numba.cuda.cudadrv.driver.Context
+          Numba CUDA context instance.
+        """
+    @staticmethod
+    def get_num_devices() -> int:
+        """Return the number of GPU devices."""
+    @property
+    def device_number(self) -> int:
+        """Return context device number."""
+    @property
+    def handle(self) -> int:
+        """Return pointer to context handle."""
+    def synchronize(self) -> None:
+        """Blocks until the device has completed all preceding requested
+        tasks.
+        """
+    @property
+    def bytes_allocated(self) -> int:
+        """Return the number of allocated bytes."""
+    def get_device_address(self, address: int) -> int:
+        """Return the device address that is reachable from kernels running in
+        the context
+
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+    def new_buffer(self, nbytes: int) -> CudaBuffer:
+        """Return new device buffer.
+
+        Parameters
+        ----------
+        nbytes : int
+          Specify the number of bytes to be allocated.
+
+        Returns
+        -------
+        buf : CudaBuffer
+          Allocated buffer.
+        """
+    @property
+    def memory_manager(self) -> lib.MemoryManager:
+        """
+        The default memory manager tied to this context's device.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device(self) -> lib.Device:
+        """
+        The device instance associated with this context.
+
+        Returns
+        -------
+        Device
+        """
+    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
+        """
+        Create device buffer from address and size as a view.
+
+        The caller is responsible for allocating and freeing the
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
+
+        Parameters
+        ----------
+        address : int
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
+        size : int
+          Specify the size of device buffer in bytes.
+        base : {None, object}
+          Specify object that owns the referenced memory.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device reachable memory.
+
+        """
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
+        """Open existing CUDA IPC memory handle
+
+        Parameters
+        ----------
+        ipc_handle : IpcMemHandle
+          Specify opaque pointer to CUipcMemHandle (driver API).
+
+        Returns
+        -------
+        buf : CudaBuffer
+          referencing device buffer
+        """
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer:
+        """Create device buffer and initialize with data.
+
+        Parameters
+        ----------
+        data : {CudaBuffer, HostBuffer, Buffer, array-like}
+          Specify data to be copied to device buffer.
+        offset : int
+          Specify the offset of input buffer for device data
+          buffering. Default: 0.
+        size : int
+          Specify the size of device buffer in bytes. Default: all
+          (starting from input offset)
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer with copied data.
+        """
+    def buffer_from_object(self, obj: Any) -> CudaBuffer:
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+
+        When the object contains a non-contiguous view of device
+        accessible memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+
+        """
+
+class IpcMemHandle(lib._Weakrefable):
+    """A serializable container for a CUDA IPC handle."""
+    @staticmethod
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
+        """Create IpcMemHandle from opaque buffer (e.g. from another
+        process)
+
+        Parameters
+        ----------
+        opaque_handle :
+          a CUipcMemHandle as a const void*
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+        """
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
+        """Write IpcMemHandle to a Buffer
+
+        Parameters
+        ----------
+        pool : {MemoryPool, None}
+          Specify a pool to allocate memory from
+
+        Returns
+        -------
+        buf : Buffer
+          The serialized buffer.
+        """
+
+class CudaBuffer(lib.Buffer):
+    """An Arrow buffer with data located in a GPU device.
+
+    To create a CudaBuffer instance, use Context.device_buffer().
+
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
+    """
+
+    @staticmethod
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
+        """Convert back generic buffer into CudaBuffer
+
+        Parameters
+        ----------
+        buf : Buffer
+          Specify buffer containing CudaBuffer
+
+        Returns
+        -------
+        dbuf : CudaBuffer
+          Resulting device buffer.
+        """
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
+        """Create a CudaBuffer view from numba MemoryPointer instance.
+
+        Parameters
+        ----------
+        mem :  numba.cuda.cudadrv.driver.MemoryPointer
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of numba MemoryPointer.
+        """
+    def to_numba(self) -> _numba_driver.MemoryPointer:
+        """Return numba memory pointer of CudaBuffer instance."""
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> lib.Buffer:
+        """Copy memory from GPU device to CPU host
+
+        Caller is responsible for ensuring that all tasks affecting
+        the memory are finished. Use
+
+          `<CudaBuffer instance>.context.synchronize()`
+
+        when needed.
+
+        Parameters
+        ----------
+        position : int
+          Specify the starting position of the source data in GPU
+          device buffer. Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          the position until host buffer is full).
+        buf : Buffer
+          Specify a pre-allocated output buffer in host. Default: None
+          (allocate new output buffer).
+        memory_pool : MemoryPool
+        resizable : bool
+          Specify extra arguments to allocate_buffer. Used only when
+          buf is None.
+
+        Returns
+        -------
+        buf : Buffer
+          Output buffer in host.
+
+        """
+    def copy_from_host(
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int:
+        """Copy data from host to device.
+
+        The device buffer must be pre-allocated.
+
+        Parameters
+        ----------
+        data : {Buffer, array-like}
+          Specify data in host. It can be array-like that is valid
+          argument to py_buffer
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
+        """Copy data from device to device.
+
+        Parameters
+        ----------
+        buf : CudaBuffer
+          Specify source device buffer.
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+
+        """
+    def export_for_ipc(self) -> IpcMemHandle:
+        """
+        Expose this device buffer as IPC memory which can be used in other
+        processes.
+
+        After calling this function, this device memory will not be
+        freed when the CudaBuffer is destructed.
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+          The exported IPC handle
+
+        """
+    @property
+    def context(self) -> Context:
+        """Returns the CUDA driver context of this buffer."""
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
+        """Return slice of device buffer
+
+        Parameters
+        ----------
+        offset : int, default 0
+          Specify offset from the start of device buffer to slice
+        length : int, default None
+          Specify the length of slice (default is until end of device
+          buffer starting from offset). If the length is larger than
+          the data available, the returned slice will have a size of
+          the available data starting from the offset.
+
+        Returns
+        -------
+        sliced : CudaBuffer
+          Zero-copy slice of device buffer.
+
+        """
+    def to_pybytes(self) -> bytes:
+        """Return device buffer content as Python bytes."""
+
+class HostBuffer(lib.Buffer):
+    """Device-accessible CPU memory created using cudaHostAlloc.
+
+    To create a HostBuffer instance, use
+
+      cuda.new_host_buffer(<nbytes>)
+    """
+    @property
+    def size(self) -> int: ...
+
+class BufferReader(lib.NativeFile):
+    """File interface for zero-copy read from CUDA buffers.
+
+    Note: Read methods return pointers to device memory. This means
+    you must be careful using this interface with any Arrow code which
+    may expect to be able to do anything other than pointer arithmetic
+    on the returned buffers.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
+        """Return a slice view of the underlying device buffer.
+
+        The slice will start at the current reader position and will
+        have specified size in bytes.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+          Specify the number of bytes to read. Default: None (read all
+          remaining bytes).
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          New device buffer.
+
+        """
+
+class BufferWriter(lib.NativeFile):
+    """File interface for writing to CUDA buffers.
+
+    By default writes are unbuffered. Use set_buffer_size to enable
+    buffering.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None:
+        """Write data to buffer starting from position.
+
+        Parameters
+        ----------
+        position : int
+          Specify device buffer position where the data will be
+          written.
+        data : array-like
+          Specify data, the data instance must implement buffer
+          protocol.
+        """
+    @property
+    def buffer_size(self) -> int:
+        """Returns size of host (CPU) buffer, 0 for unbuffered"""
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int):
+        """Set CPU buffer size to limit calls to cudaMemcpy
+
+        Parameters
+        ----------
+        buffer_size : int
+          Specify the size of CPU buffer to allocate in bytes.
+        """
+    @property
+    def num_bytes_buffered(self) -> int:
+        """Returns number of bytes buffered on host"""
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
+    """Return buffer with CUDA-accessible memory on CPU host
+
+    Parameters
+    ----------
+    size : int
+      Specify the number of bytes to be allocated.
+    device : int
+      Specify GPU device number.
+
+    Returns
+    -------
+    dbuf : HostBuffer
+      Allocated host buffer
+    """
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
+    """Write record batch message to GPU device memory
+
+    Parameters
+    ----------
+    batch : RecordBatch
+      Record batch to write
+    ctx : Context
+      CUDA Context to allocate device memory from
+
+    Returns
+    -------
+    dbuf : CudaBuffer
+      device buffer which contains the record batch message
+    """
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message:
+    """Read Arrow IPC message located on GPU device
+
+    Parameters
+    ----------
+    source : {CudaBuffer, cuda.BufferReader}
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
+
+    Returns
+    -------
+    message : Message
+      The deserialized message, body still on device
+    """
+
+def read_record_batch(
+    buffer: lib.Buffer,
+    object: lib.Schema,
+    *,
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch:
+    """Construct RecordBatch referencing IPC message located on CUDA device.
+
+    While the metadata is copied to host memory for deserialization,
+    the record batch data remains on the device.
+
+    Parameters
+    ----------
+    buffer :
+      Device buffer containing the complete IPC message
+    schema : Schema
+      The schema for the record batch
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
+
+    Returns
+    -------
+    batch : RecordBatch
+      Reconstructed record batch, with device pointers
+
+    """
diff --git a/python/stubs/_dataset.pyi b/python/stubs/_dataset.pyi
new file mode 100644
index 00000000000..af864f9154b
--- /dev/null
+++ b/python/stubs/_dataset.pyi
@@ -0,0 +1,2299 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    IO,
+    Any,
+    Callable,
+    Generic,
+    Iterator,
+    Literal,
+    NamedTuple,
+    TypeVar,
+    overload,
+)
+
+from _typeshed import StrPath
+
+from . import _csv, _json, _parquet, lib
+from ._fs import FileSelector, FileSystem, SupportedFileSystem
+from ._stubs_typing import Indices, JoinType, Order
+from .acero import ExecNodeOptions
+from .compute import Expression
+from .ipc import IpcWriteOptions, RecordBatchReader
+
+class Dataset(lib._Weakrefable):
+    """
+    Collection of data fragments and potentially child datasets.
+
+    Arrow Datasets allow you to query against data that has been split across
+    multiple files. This sharding of data may indicate partitioning, which
+    can accelerate queries that only touch some partitions (files).
+    """
+
+    @property
+    def partition_expression(self) -> Expression:
+        """
+        An Expression which evaluates to true for all data viewed by this
+        Dataset.
+        """
+    def replace_schema(self, schema: lib.Schema) -> None:
+        """
+        Return a copy of this Dataset with a different schema.
+
+        The copy will view the same Fragments. If the new schema is not
+        compatible with the original dataset's schema then an error will
+        be raised.
+
+        Parameters
+        ----------
+        schema : Schema
+            The new dataset schema.
+        """
+    def get_fragments(self, filter: Expression | None = None):
+        """Returns an iterator over the fragments in this dataset.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Return fragments matching the optional filter, either using the
+            partition_expression or internal information like Parquet's
+            statistics.
+
+        Returns
+        -------
+        fragments : iterator of Fragment
+        """
+    def scanner(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the dataset.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        See the :meth:`Scanner.from_dataset` method for further information.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>>
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "dataset_scanner.parquet")
+
+        >>> import pyarrow.dataset as ds
+        >>> dataset = ds.dataset("dataset_scanner.parquet")
+
+        Selecting a subset of the columns:
+
+        >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2021,2022,2019,2021]]
+        n_legs: [[2,2,4,4,5,100]]
+
+        Projecting selected columns using an expression:
+
+        >>> dataset.scanner(
+        ...     columns={
+        ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
+        ...     }
+        ... ).to_table()
+        pyarrow.Table
+        n_legs_uint: uint8
+        ----
+        n_legs_uint: [[2,2,4,4,5,100]]
+
+        Filtering rows while scanning:
+
+        >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2022,2021,2022,2021]]
+        n_legs: [[2,4,4,100]]
+        animal: [["Parrot","Dog","Horse","Centipede"]]
+        """
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the dataset as materialized record batches.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Read the dataset to an Arrow table.
+
+        Note that this method reads all the selected data from the dataset
+        into memory.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The common schema of the full Dataset"""
+    def filter(self, expression: Expression) -> Self:
+        """
+        Apply a row filter to the dataset.
+
+        Parameters
+        ----------
+        expression : Expression
+            The filter that should be applied to the dataset.
+
+        Returns
+        -------
+        Dataset
+        """
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
+        """
+        Sort the Dataset by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        InMemoryDataset
+            A new dataset sorted according to the sort keys.
+        """
+    def join(
+        self,
+        right_dataset: Dataset,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> InMemoryDataset:
+        """
+        Perform a join between this dataset and another one.
+
+        Result of the join will be a new dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        keys : str or list[str]
+            The columns from current dataset that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_dataset that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to right column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the left column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whenever to use multithreading or not.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+    def join_asof(
+        self,
+        right_dataset: Dataset,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> InMemoryDataset:
+        """
+        Perform an asof join between this dataset and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both datasets must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        on : str
+            The column from current dataset that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input table must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current dataset that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row `right.on - left.on <= tolerance`. The
+            `tolerance` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_dataset that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left dataset.
+        right_by : str or list[str], default None
+            The columns from the right_dataset that should be used as by keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+
+class InMemoryDataset(Dataset):
+    """
+    A Dataset wrapping in-memory data.
+
+    Parameters
+    ----------
+    source : RecordBatch, Table, list, tuple
+        The data for this dataset. Can be a RecordBatch, Table, list of
+        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
+        If an iterable is provided, the schema must also be provided.
+    schema : Schema, optional
+        Only required if passing an iterable as the source
+    """
+
+class UnionDataset(Dataset):
+    """
+    A Dataset wrapping child datasets.
+
+    Children's schemas must agree with the provided schema.
+
+    Parameters
+    ----------
+    schema : Schema
+        A known schema to conform to.
+    children : list of Dataset
+        One or more input children
+    """
+
+    @property
+    def children(self) -> list[Dataset]: ...
+
+class FileSystemDataset(Dataset):
+    """
+    A Dataset of file fragments.
+
+    A FileSystemDataset is composed of one or more FileFragment.
+
+    Parameters
+    ----------
+    fragments : list[Fragments]
+        List of fragments to consume.
+    schema : Schema
+        The top-level schema of the Dataset.
+    format : FileFormat
+        File format of the fragments, currently only ParquetFileFormat,
+        IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+    filesystem : FileSystem
+        FileSystem of the fragments.
+    root_partition : Expression, optional
+        The top-level partition of the DataDataset.
+    """
+
+    def __init__(
+        self,
+        fragments: list[Fragment],
+        schema: lib.Schema,
+        format: FileFormat,
+        filesystem: SupportedFileSystem | None = None,
+        root_partition: Expression | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_paths(
+        cls,
+        paths: list[str],
+        schema: lib.Schema | None = None,
+        format: FileFormat | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        partitions: list[Expression] | None = None,
+        root_partition: Expression | None = None,
+    ) -> FileSystemDataset:
+        """
+        A Dataset created from a list of paths on a particular filesystem.
+
+        Parameters
+        ----------
+        paths : list of str
+            List of file paths to create the fragments from.
+        schema : Schema
+            The top-level schema of the DataDataset.
+        format : FileFormat
+            File format to create fragments from, currently only
+            ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+        filesystem : FileSystem
+            The filesystem which files are from.
+        partitions : list[Expression], optional
+            Attach additional partition information for the file paths.
+        root_partition : Expression, optional
+            The top-level partition of the DataDataset.
+        """
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning | None:
+        """
+        The partitioning of the Dataset source, if discovered.
+
+        If the FileSystemDataset is created using the ``dataset()`` factory
+        function with a partitioning specified, this will return the
+        finalized Partitioning object from the dataset discovery. In all
+        other cases, this returns None.
+        """
+    @property
+    def files(self) -> list[str]:
+        """List of the files"""
+    @property
+    def format(self) -> FileFormat:
+        """The FileFormat of this source."""
+
+class FileWriteOptions(lib._Weakrefable):
+    @property
+    def format(self) -> FileFormat: ...
+
+class FileFormat(lib._Weakrefable):
+    def inspect(
+        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+    ) -> lib.Schema:
+        """
+        Infer the schema of a file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to infer a schema from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+
+        Returns
+        -------
+        schema : Schema
+            The schema inferred from the file
+        """
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+    def make_write_options(self) -> FileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+    @property
+    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+    @default_fragment_scan_options.setter
+    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+
+class Fragment(lib._Weakrefable):
+    """Fragment of data from a Dataset."""
+    @property
+    def physical_schema(self) -> lib.Schema:
+        """Return the physical schema of this Fragment. This schema can be
+        different from the dataset read schema."""
+    @property
+    def partition_expression(self) -> Expression:
+        """An Expression which evaluates to true for all data viewed by this
+        Fragment.
+        """
+    def scanner(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the fragment.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema to use for scanning. This is used to unify a Fragment to
+            its Dataset's schema. If not specified this will use the
+            Fragment's physical schema which might differ for each Fragment.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+        """
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the fragment as materialized record batches.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Convert this Fragment into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices of row to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the fragment.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+
+class FileFragment(Fragment):
+    """A Fragment representing a data file."""
+
+    def open(self) -> lib.NativeFile:
+        """
+        Open a NativeFile of the buffer or file viewed by this fragment.
+        """
+    @property
+    def path(self) -> str:
+        """
+        The path of the data file viewed by this fragment, if it views a
+        file. If instead it views a buffer, this will be "<Buffer>".
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The FileSystem containing the data file viewed by this fragment, if
+        it views a file. If instead it views a buffer, this will be None.
+        """
+    @property
+    def buffer(self) -> lib.Buffer:
+        """
+        The buffer viewed by this fragment, if it views a buffer. If
+        instead it views a file, this will be None.
+        """
+    @property
+    def format(self) -> FileFormat:
+        """
+        The format of the data file viewed by this fragment.
+        """
+
+class FragmentScanOptions(lib._Weakrefable):
+    """Scan options specific to a particular fragment and scan operation."""
+
+    @property
+    def type_name(self) -> str: ...
+
+class IpcFileWriteOptions(FileWriteOptions):
+    @property
+    def write_options(self) -> IpcWriteOptions: ...
+    @write_options.setter
+    def write_options(self, write_options: IpcWriteOptions) -> None: ...
+
+class IpcFileFormat(FileFormat):
+    def equals(self, other: IpcFileFormat) -> bool: ...
+    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+
+class FeatherFileFormat(IpcFileFormat): ...
+
+class CsvFileFormat(FileFormat):
+    """
+    FileFormat for CSV files.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.csv.ParseOptions
+        Options regarding CSV parsing.
+    default_fragment_scan_options : CsvFragmentScanOptions
+        Default options for fragments scan.
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        parse_options: _csv.ParseOptions | None = None,
+        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+        convert_options: _csv.ConvertOptions | None = None,
+        read_options: _csv.ReadOptions | None = None,
+    ) -> None: ...
+    def make_write_options(self) -> _csv.WriteOptions: ...  # type: ignore[override]
+    @property
+    def parse_options(self) -> _csv.ParseOptions: ...
+    @parse_options.setter
+    def parse_options(self, parse_options: _csv.ParseOptions) -> None: ...
+    def equals(self, other: CsvFileFormat) -> bool: ...
+
+class CsvFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for CSV fragments.
+
+    Parameters
+    ----------
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+
+    convert_options: _csv.ConvertOptions
+    read_options: _csv.ReadOptions
+
+    def __init__(
+        self, convert_options: _csv.ConvertOptions, read_options: _csv.ReadOptions
+    ) -> None: ...
+    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+
+class CsvFileWriteOptions(FileWriteOptions):
+    write_options: _csv.WriteOptions
+
+class JsonFileFormat(FileFormat):
+    """
+    FileFormat for JSON files.
+
+    Parameters
+    ----------
+    default_fragment_scan_options : JsonFragmentScanOptions
+        Default options for fragments scan.
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding json parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFileFormat) -> bool: ...
+
+class JsonFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for JSON fragments.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding JSON parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+
+    parse_options: _json.ParseOptions
+    read_options: _json.ReadOptions
+    def __init__(
+        self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
+    ) -> None: ...
+    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+
+class Partitioning(lib._Weakrefable):
+    def parse(self, path: str) -> Expression:
+        """
+        Parse a path into a partition expression.
+
+        Parameters
+        ----------
+        path : str
+
+        Returns
+        -------
+        pyarrow.dataset.Expression
+        """
+    def format(self, expr: Expression) -> tuple[str, str]:
+        """
+        Convert a filter expression into a tuple of (directory, filename) using
+        the current partitioning scheme
+
+        Parameters
+        ----------
+        expr : pyarrow.dataset.Expression
+
+        Returns
+        -------
+        tuple[str, str]
+
+        Examples
+        --------
+
+        Specify the Schema for paths like "/2009/June":
+
+        >>> import pyarrow as pa
+        >>> import pyarrow.dataset as ds
+        >>> import pyarrow.compute as pc
+        >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
+        >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
+        ('1862/Jan', '')
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The arrow Schema attached to the partitioning."""
+
+class PartitioningFactory(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+class KeyValuePartitioning(Partitioning):
+    @property
+    def dictionaries(self) -> list[lib.Array | None]:
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If no dictionary field is available, this returns an empty list.
+        """
+
+class DirectoryPartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The DirectoryPartitioning expects one segment in the file path for each
+    field in the schema (all fields are required to be present).
+    For example given schema<year:int16, month:int8> the path "/2009/11" would
+    be parsed to ("year"_ == 2009 and "month"_ == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    DirectoryPartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import DirectoryPartitioning
+    >>> partitioning = DirectoryPartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("/2009/11/"))
+    ((year == 2009) and (month == 11))
+    """
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a DirectoryPartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+class HivePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning for "/$key=$value/" nested directories as found in
+    Apache Hive.
+
+    Multi-level, directory based partitioning scheme originating from
+    Apache Hive with all data files stored in the leaf directories. Data is
+    partitioned by static values of a particular column in the schema.
+    Partition keys are represented in the form $key=$value in directory names.
+    Field order is ignored, as are missing or unrecognized field names.
+
+    For example, given schema<year:int16, month:int8, day:int8>, a possible
+    path would be "/year=2009/month=11/day=15".
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+        If any field is None then this fallback will be used as a label
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    HivePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import HivePartitioning
+    >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
+    >>> print(partitioning.parse("/year=2009/month=11/"))
+    ((year == 2009) and (month == 11))
+
+    """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        null_fallback="__HIVE_DEFAULT_PARTITION__",
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a HivePartitioning.
+
+        Parameters
+        ----------
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain. This can be more efficient when
+            materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+            When inferring a schema for partition fields this value will be
+            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
+            for compatibility with Spark
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class FilenamePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The FilenamePartitioning expects one segment in the file name for each
+    field in the schema (all fields are required to be present) separated
+    by '_'. For example given schema<year:int16, month:int8> the name
+    ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    FilenamePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import FilenamePartitioning
+    >>> partitioning = FilenamePartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("2009_11_data.parquet"))
+    ((year == 2009) and (month == 11))
+    """
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a FilenamePartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class DatasetFactory(lib._Weakrefable):
+    """
+    DatasetFactory is used to create a Dataset, inspect the Schema
+    of the fragments contained in it, and declare a partitioning.
+    """
+
+    root_partition: Expression
+    def finish(self, schema: lib.Schema | None = None) -> Dataset:
+        """
+        Create a Dataset using the inspected schema or an explicit schema
+        (if given).
+
+        Parameters
+        ----------
+        schema : Schema, default None
+            The schema to conform the source to.  If None, the inspected
+            schema is used.
+
+        Returns
+        -------
+        Dataset
+        """
+    def inspect(self) -> lib.Schema:
+        """
+        Inspect all data fragments and return a common Schema.
+
+        Returns
+        -------
+        Schema
+        """
+    def inspect_schemas(self) -> list[lib.Schema]: ...
+
+class FileSystemFactoryOptions(lib._Weakrefable):
+    """
+    Influences the discovery of filesystem paths.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning/PartitioningFactory, optional
+       Apply the Partitioning to every discovered Fragment. See Partitioning or
+       PartitioningFactory documentation.
+    exclude_invalid_files : bool, optional (default True)
+        If True, invalid files will be excluded (file format specific check).
+        This will incur IO for each files in a serial and single threaded
+        fashion. Disabling this feature will skip the IO, but unsupported
+        files may be present in the Dataset (resulting in an error at scan
+        time).
+    selector_ignore_prefixes : list, optional
+        When discovering from a Selector (and not from an explicit file list),
+        ignore files and directories matching any of these prefixes.
+        By default this is ['.', '_'].
+    """
+
+    partitioning: Partitioning
+    partitioning_factory: PartitioningFactory
+    partition_base_dir: str
+    exclude_invalid_files: bool
+    selector_ignore_prefixes: list[str]
+
+    def __init__(
+        self,
+        artition_base_dir: str | None = None,
+        partitioning: Partitioning | PartitioningFactory | None = None,
+        exclude_invalid_files: bool = True,
+        selector_ignore_prefixes: list[str] | None = None,
+    ) -> None: ...
+
+class FileSystemDatasetFactory(DatasetFactory):
+    """
+    Create a DatasetFactory from a list of paths with schema inspection.
+
+    Parameters
+    ----------
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to discover.
+    paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
+        Either a Selector object or a list of path-like objects.
+    format : FileFormat
+        Currently only ParquetFileFormat and IpcFileFormat are supported.
+    options : FileSystemFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+
+    def __init__(
+        self,
+        filesystem: SupportedFileSystem,
+        paths_or_selector: FileSelector,
+        format: FileFormat,
+        options: FileSystemFactoryOptions | None = None,
+    ) -> None: ...
+
+class UnionDatasetFactory(DatasetFactory):
+    """
+    Provides a way to inspect/discover a Dataset's expected schema before
+    materialization.
+
+    Parameters
+    ----------
+    factories : list of DatasetFactory
+    """
+    def __init__(self, factories: list[DatasetFactory]) -> None: ...
+
+_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+
+class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+    """An iterator over a sequence of record batches."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> _RecordBatchT: ...
+
+class TaggedRecordBatch(NamedTuple):
+    """
+    A combination of a record batch and the fragment it came from.
+
+    Parameters
+    ----------
+    record_batch : RecordBatch
+        The record batch.
+    fragment : Fragment
+        Fragment of the record batch.
+    """
+
+    record_batch: lib.RecordBatch
+    fragment: Fragment
+
+class TaggedRecordBatchIterator(lib._Weakrefable):
+    """An iterator over a sequence of record batches with fragments."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> TaggedRecordBatch: ...
+
+class Scanner(lib._Weakrefable):
+    """A materialized scan operation with context and options bound.
+
+    A scanner is the class that glues the scan tasks, data fragments and data
+    sources together.
+    """
+    @staticmethod
+    def from_dataset(
+        dataset: Dataset,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Dataset,
+
+        Parameters
+        ----------
+        dataset : Dataset
+            Dataset to scan.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @staticmethod
+    def from_fragment(
+        fragment: Fragment,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Fragment,
+
+        Parameters
+        ----------
+        fragment : Fragment
+            fragment to scan.
+        schema : Schema, optional
+            The schema of the fragment.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @overload
+    @staticmethod
+    def from_batches(
+        source: Iterator[lib.RecordBatch],
+        *,
+        schema: lib.Schema,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+    @overload
+    @staticmethod
+    def from_batches(
+        source: RecordBatchReader,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+    @staticmethod
+    def from_batches(*args, **kwargs):
+        """
+        Create a Scanner from an iterator of batches.
+
+        This creates a scanner which can be used only once. It is
+        intended to support writing a dataset (which takes a scanner)
+        from a source which can be read only once (e.g. a
+        RecordBatchReader or generator).
+
+        Parameters
+        ----------
+        source : Iterator or Arrow-compatible stream object
+            The iterator of Batches. This can be a pyarrow RecordBatchReader,
+            any object that implements the Arrow PyCapsule Protocol for
+            streams, or an actual Python iterator of RecordBatches.
+        schema : Schema
+            The schema of the batches (required when passing a Python
+            iterator).
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @property
+    def dataset_schema(self) -> lib.Schema:
+        """The schema with which batches will be read from fragments."""
+    @property
+    def projected_schema(self) -> lib.Schema:
+        """
+        The materialized schema of the data, accounting for projections.
+
+        This is the schema of any data returned from the scanner.
+        """
+    def to_batches(self) -> Iterator[lib.RecordBatch]:
+        """
+        Consume a Scanner in record batches.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def scan_batches(self) -> TaggedRecordBatchIterator:
+        """
+        Consume a Scanner in record batches with corresponding fragments.
+
+        Returns
+        -------
+        record_batches : iterator of TaggedRecordBatch
+        """
+    def to_table(self) -> lib.Table:
+        """
+        Convert a Scanner into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Returns
+        -------
+        Table
+        """
+    def take(self, indices: Indices) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Will only consume as many batches of the underlying dataset as
+        needed. Otherwise, this is equivalent to
+        ``to_table().take(indices)``.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+
+        Returns
+        -------
+        Table
+        """
+    def head(self, num_rows: int) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(self) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Returns
+        -------
+        count : int
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Consume this scanner as a RecordBatchReader.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
+    """
+    Extract partition keys (equality constraints between a field and a scalar)
+    from an expression as a dict mapping the field's name to its value.
+
+    NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
+    will be conjunctions of equality conditions and are accessible through this
+    function. Other subexpressions will be ignored.
+
+    Parameters
+    ----------
+    partition_expression : pyarrow.dataset.Expression
+
+    Returns
+    -------
+    dict
+
+    Examples
+    --------
+
+    For example, an expression of
+    <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
+    is converted to {'part': 'A', 'year': 2016}
+    """
+
+class WrittenFile(lib._Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+
+    Parameters
+    ----------
+    path : str
+        Path to the file.
+    metadata : pyarrow.parquet.FileMetaData, optional
+        For Parquet files, the Parquet file metadata.
+    size : int
+        The size of the file in bytes.
+    """
+    def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
+
+def _filesystemdataset_write(
+    data: Scanner,
+    base_dir: StrPath,
+    basename_template: str,
+    filesystem: SupportedFileSystem,
+    partitioning: Partitioning,
+    file_options: FileWriteOptions,
+    max_partitions: int,
+    file_visitor: Callable[[str], None],
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+    max_open_files: int,
+    max_rows_per_file: int,
+    min_rows_per_group: int,
+    max_rows_per_group: int,
+    create_dir: bool,
+): ...
+
+class _ScanNodeOptions(ExecNodeOptions):
+    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+
+class ScanNodeOptions(_ScanNodeOptions):
+    """
+    A Source node which yields batches from a Dataset scan.
+
+    This is the option class for the "scan" node factory.
+
+    This node is capable of applying pushdown projections or filters
+    to the file readers which reduce the amount of data that needs to
+    be read (if supported by the file format). But note that this does not
+    construct associated filter or project nodes to perform the final
+    filtering or projection. Rather, you may supply the same filter
+    expression or projection to the scan node that you also supply
+    to the filter or project node.
+
+    Yielded batches will be augmented with fragment/batch indices when
+    implicit_ordering=True to enable stable ordering for simple ExecPlans.
+
+    Parameters
+    ----------
+    dataset : pyarrow.dataset.Dataset
+        The table which acts as the data source.
+    **kwargs : dict, optional
+        Scan options. See `Scanner.from_dataset` for possible arguments.
+    require_sequenced_output : bool, default False
+        Batches are yielded sequentially, like single-threaded
+    implicit_ordering : bool, default False
+        Preserve implicit ordering of data.
+    """
+
+    def __init__(
+        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+    ) -> None: ...
diff --git a/python/stubs/_dataset_orc.pyi b/python/stubs/_dataset_orc.pyi
new file mode 100644
index 00000000000..9c4ac04198f
--- /dev/null
+++ b/python/stubs/_dataset_orc.pyi
@@ -0,0 +1,6 @@
+from ._dataset import FileFormat
+
+class OrcFileFormat(FileFormat):
+    def equals(self, other: OrcFileFormat) -> bool: ...
+    @property
+    def default_extname(self): ...
diff --git a/python/stubs/_dataset_parquet.pyi b/python/stubs/_dataset_parquet.pyi
new file mode 100644
index 00000000000..cbcc17235f1
--- /dev/null
+++ b/python/stubs/_dataset_parquet.pyi
@@ -0,0 +1,314 @@
+from dataclasses import dataclass
+from typing import IO, Any, Iterable, TypedDict
+
+from _typeshed import StrPath
+
+from ._compute import Expression
+from ._dataset import (
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+)
+from ._dataset_parquet_encryption import ParquetDecryptionConfig
+from ._fs import SupportedFileSystem
+from ._parquet import FileDecryptionProperties, FileMetaData
+from .lib import CacheOptions, Schema, _Weakrefable
+
+parquet_encryption_enabled: bool
+
+class ParquetFileFormat(FileFormat):
+    """
+    FileFormat for Parquet
+
+    Parameters
+    ----------
+    read_options : ParquetReadOptions
+        Read options for the file.
+    default_fragment_scan_options : ParquetFragmentScanOptions
+        Scan Options for the file.
+    **kwargs : dict
+        Additional options for read option or scan option
+    """
+    def __init__(
+        self,
+        read_options: ParquetReadOptions | None = None,
+        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
+        **kwargs,
+    ) -> None: ...
+    @property
+    def read_options(self) -> ParquetReadOptions: ...
+    def make_write_options(self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+    def equals(self, other: ParquetFileFormat) -> bool: ...
+    @property
+    def default_extname(self) -> str: ...
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        row_groups: Iterable[int] | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        row_groups : Iterable, optional
+            The indices of the row groups to include
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+
+class _NameStats(TypedDict):
+    min: Any
+    max: Any
+
+class RowGroupInfo:
+    """
+    A wrapper class for RowGroup information
+
+    Parameters
+    ----------
+    id : integer
+        The group ID.
+    metadata : FileMetaData
+        The rowgroup metadata.
+    schema : Schema
+        Schema of the rows.
+    """
+
+    id: int
+    metadata: FileMetaData
+    schema: Schema
+
+    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def statistics(self) -> dict[str, _NameStats]: ...
+
+class ParquetFileFragment(FileFragment):
+    """A Fragment representing a parquet file."""
+
+    def ensure_complete_metadata(self) -> None: ...
+    @property
+    def row_groups(self) -> list[RowGroupInfo]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups viewed by this fragment (not the
+        number of row groups in the origin file).
+        """
+    def split_by_row_group(
+        self, filter: Expression | None = None, schema: Schema | None = None
+    ) -> list[Fragment]:
+        """
+        Split the fragment into multiple fragments.
+
+        Yield a Fragment wrapping each row group in this ParquetFileFragment.
+        Row groups will be excluded whose metadata contradicts the optional
+        filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+
+        Returns
+        -------
+        A list of Fragments
+        """
+    def subset(
+        self,
+        filter: Expression | None = None,
+        schema: Schema | None = None,
+        row_group_ids: list[int] | None = None,
+    ) -> ParquetFileFormat:
+        """
+        Create a subset of the fragment (viewing a subset of the row groups).
+
+        Subset can be specified by either a filter predicate (with optional
+        schema) or by a list of row group IDs. Note that when using a filter,
+        the resulting fragment can be empty (viewing no row groups).
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+        row_group_ids : list of ints
+            The row group IDs to include in the subset. Can only be specified
+            if `filter` is None.
+
+        Returns
+        -------
+        ParquetFileFragment
+        """
+
+class ParquetReadOptions(_Weakrefable):
+    """
+    Parquet format specific options for reading.
+
+    Parameters
+    ----------
+    dictionary_columns : list of string, default None
+        Names of columns which should be dictionary encoded as
+        they are read
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds
+    """
+    def __init__(
+        self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
+    ) -> None: ...
+    @property
+    def coerce_int96_timestamp_unit(self) -> str: ...
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
+    def equals(self, other: ParquetReadOptions) -> bool: ...
+
+class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs) -> None: ...
+    def _set_properties(self) -> None: ...
+    def _set_arrow_properties(self) -> None: ...
+    def _set_encryption_config(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParquetFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for Parquet fragments.
+
+    Parameters
+    ----------
+    use_buffered_stream : bool, default False
+        Read files through buffered input streams rather than loading entire
+        row groups at once. This may be enabled to reduce memory overhead.
+        Disabled by default.
+    buffer_size : int, default 8192
+        Size of buffered stream, if enabled. Default is 8KB.
+    pre_buffer : bool, default True
+        If enabled, pre-buffer the raw Parquet data instead of issuing one
+        read per column chunk. This can improve performance on high-latency
+        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
+        parallel using a background I/O thread pool.
+        Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    cache_options : pyarrow.CacheOptions, default None
+        Cache options used when pre_buffer is enabled. The default values should
+        be good for most use cases. You may want to adjust these for example if
+        you have exceptionally high latency to the file system.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
+        If not None, use the provided ParquetDecryptionConfig to decrypt the
+        Parquet file.
+    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
+        If not None, use the provided FileDecryptionProperties to decrypt encrypted
+        Parquet file.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+    """
+
+    use_buffered_stream: bool = False
+    buffer_size: int = 8192
+    pre_buffer: bool = True
+    cache_options: CacheOptions | None = None
+    thrift_string_size_limit: int | None = None
+    thrift_container_size_limit: int | None = None
+    decryption_config: ParquetDecryptionConfig | None = None
+    decryption_properties: FileDecryptionProperties | None = None
+    page_checksum_verification: bool = False
+
+    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
+
+@dataclass
+class ParquetFactoryOptions(_Weakrefable):
+    """
+    Influences the discovery of parquet dataset.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning, PartitioningFactory, optional
+        The partitioning scheme applied to fragments, see ``Partitioning``.
+    validate_column_chunk_paths : bool, default False
+        Assert that all ColumnChunk paths are consistent. The parquet spec
+        allows for ColumnChunk data to be stored in multiple files, but
+        ParquetDatasetFactory supports only a single file with all ColumnChunk
+        data. If this flag is set construction of a ParquetDatasetFactory will
+        raise an error if ColumnChunk data is not resident in a single file.
+    """
+
+    partition_base_dir: str | None = None
+    partitioning: Partitioning | PartitioningFactory | None = None
+    validate_column_chunk_paths: bool = False
+
+class ParquetDatasetFactory(DatasetFactory):
+    """
+    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
+
+    Parameters
+    ----------
+    metadata_path : str
+        Path to the `_metadata` parquet metadata-only file generated with
+        `pyarrow.parquet.write_metadata`.
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to read the metadata_path from, and subsequent parquet
+        files.
+    format : ParquetFileFormat
+        Parquet format options.
+    options : ParquetFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+    def __init__(
+        self,
+        metadata_path: str,
+        filesystem: SupportedFileSystem,
+        format: FileFormat,
+        options: ParquetFactoryOptions | None = None,
+    ) -> None: ...
diff --git a/python/stubs/_dataset_parquet_encryption.pyi b/python/stubs/_dataset_parquet_encryption.pyi
new file mode 100644
index 00000000000..7623275b865
--- /dev/null
+++ b/python/stubs/_dataset_parquet_encryption.pyi
@@ -0,0 +1,85 @@
+from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
+from ._parquet import FileDecryptionProperties
+from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
+from .lib import _Weakrefable
+
+class ParquetEncryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level encryption
+    within the Parquet framework.
+
+    The ParquetEncryptionConfig class serves as a bridge for passing encryption-related
+    parameters to the appropriate components within the Parquet library. It maintains references
+    to objects that define the encryption strategy, Key Management Service (KMS) configuration,
+    and specific encryption configurations for Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object. The `CryptoFactory` is responsible for
+        creating cryptographic components, such as encryptors and decryptors.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object. This object holds the configuration
+        parameters necessary for connecting to a Key Management Service (KMS).
+    encryption_config : pyarrow.parquet.encryption.EncryptionConfiguration
+        Shared pointer to an `EncryptionConfiguration` object. This object defines specific
+        encryption settings for Parquet data, including the keys assigned to different columns.
+
+    Raises
+    ------
+    ValueError
+        Raised if `encryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+class ParquetDecryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level decryption
+    within the Parquet framework.
+
+    ParquetDecryptionConfig is designed to pass decryption-related parameters to
+    the appropriate decryption components within the Parquet library. It holds references to
+    objects that define the decryption strategy, Key Management Service (KMS) configuration,
+    and specific decryption configurations for reading encrypted Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object, pivotal in creating cryptographic
+        components for the decryption process.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object, containing parameters necessary
+        for connecting to a Key Management Service (KMS) during decryption.
+    decryption_config : pyarrow.parquet.encryption.DecryptionConfiguration
+        Shared pointer to a `DecryptionConfiguration` object, specifying decryption settings
+        for reading encrypted Parquet data.
+
+    Raises
+    ------
+    ValueError
+        Raised if `decryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+def set_encryption_config(
+    opts: ParquetFileWriteOptions,
+    config: ParquetEncryptionConfig,
+) -> None: ...
+def set_decryption_properties(
+    opts: ParquetFragmentScanOptions,
+    config: FileDecryptionProperties,
+): ...
+def set_decryption_config(
+    opts: ParquetFragmentScanOptions,
+    config: ParquetDecryptionConfig,
+): ...
diff --git a/python/stubs/_feather.pyi b/python/stubs/_feather.pyi
new file mode 100644
index 00000000000..8bb914ba45d
--- /dev/null
+++ b/python/stubs/_feather.pyi
@@ -0,0 +1,29 @@
+from typing import IO
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+class FeatherError(Exception): ...
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: int = 2,
+): ...
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: list[int]) -> Table: ...
+    def read_names(self, names: list[str]) -> Table: ...
diff --git a/python/stubs/_flight.pyi b/python/stubs/_flight.pyi
new file mode 100644
index 00000000000..4450c42df49
--- /dev/null
+++ b/python/stubs/_flight.pyi
@@ -0,0 +1,1380 @@
+import asyncio
+import enum
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Generator, Generic, Iterable, Iterator, NamedTuple, TypeVar
+
+from typing_extensions import deprecated
+
+from .ipc import _ReadPandasMixin
+from .lib import (
+    ArrowCancelled,
+    ArrowException,
+    ArrowInvalid,
+    Buffer,
+    IpcReadOptions,
+    IpcWriteOptions,
+    RecordBatch,
+    RecordBatchReader,
+    Schema,
+    Table,
+    TimestampScalar,
+    _CRecordBatchWriter,
+    _Weakrefable,
+)
+
+_T = TypeVar("_T")
+
+class FlightCallOptions(_Weakrefable):
+    """RPC-layer options for a Flight call."""
+
+    def __init__(
+        self,
+        timeout: float | None = None,
+        write_options: IpcWriteOptions | None = None,
+        headers: list[tuple[str, str]] | None = None,
+        read_options: IpcReadOptions | None = None,
+    ) -> None:
+        """Create call options.
+
+        Parameters
+        ----------
+        timeout : float, None
+            A timeout for the call, in seconds. None means that the
+            timeout defaults to an implementation-specific value.
+        write_options : pyarrow.ipc.IpcWriteOptions, optional
+            IPC write options. The default options can be controlled
+            by environment variables (see pyarrow.ipc).
+        headers : List[Tuple[str, str]], optional
+            A list of arbitrary headers as key, value tuples
+        read_options : pyarrow.ipc.IpcReadOptions, optional
+            Serialization options for reading IPC format.
+        """
+
+class CertKeyPair(NamedTuple):
+    """A TLS certificate and key for use in Flight."""
+
+    cert: str
+    key: str
+
+class FlightError(Exception):
+    """
+    The base class for Flight-specific errors.
+
+    A server may raise this class or one of its subclasses to provide
+    a more detailed error to clients.
+
+    Parameters
+    ----------
+    message : str, optional
+        The error message.
+    extra_info : bytes, optional
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+
+    Attributes
+    ----------
+    extra_info : bytes
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+    """
+
+    extra_info: bytes
+
+class FlightInternalError(FlightError, ArrowException):
+    """An error internal to the Flight server occurred."""
+
+class FlightTimedOutError(FlightError, ArrowException):
+    """The Flight RPC call timed out."""
+
+class FlightCancelledError(FlightError, ArrowCancelled):
+    """The operation was cancelled."""
+
+class FlightServerError(FlightError, ArrowException):
+    """A server error occurred."""
+
+class FlightUnauthenticatedError(FlightError, ArrowException):
+    """The client is not authenticated."""
+
+class FlightUnauthorizedError(FlightError, ArrowException):
+    """The client is not authorized to perform the given operation."""
+
+class FlightUnavailableError(FlightError, ArrowException):
+    """The server is not reachable or available."""
+
+class FlightWriteSizeExceededError(ArrowInvalid):
+    """A write operation exceeded the client-configured limit."""
+
+    limit: int
+    actual: int
+
+class Action(_Weakrefable):
+    """An action executable on a Flight service."""
+
+    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None:
+        """Create an action from a type and a buffer.
+
+        Parameters
+        ----------
+        action_type : bytes or str
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def type(self) -> str:
+        """The action type."""
+    @property
+    def body(self) -> Buffer:
+        """The action body (arguments for the action)."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class ActionType(NamedTuple):
+    """A type of action that is executable on a Flight service."""
+
+    type: str
+    description: str
+
+    def make_action(self, buf: Buffer | bytes) -> Action:
+        """Create an Action with this type.
+
+        Parameters
+        ----------
+        buf : obj
+            An Arrow buffer or Python bytes or bytes-like object.
+        """
+
+class Result(_Weakrefable):
+    """A result from executing an Action."""
+    def __init__(self, buf: Buffer | bytes) -> None:
+        """Create a new result.
+
+        Parameters
+        ----------
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def body(self) -> Buffer:
+        """Get the Buffer containing the result."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class BasicAuth(_Weakrefable):
+    """A container for basic auth."""
+    def __init__(
+        self, username: str | bytes | None = None, password: str | bytes | None = None
+    ) -> None:
+        """Create a new basic auth object.
+
+        Parameters
+        ----------
+        username : string
+        password : string
+        """
+    @property
+    def username(self) -> bytes: ...
+    @property
+    def password(self) -> bytes: ...
+    def serialize(self) -> str: ...
+    @staticmethod
+    def deserialize(serialized: str | bytes) -> BasicAuth: ...
+
+class DescriptorType(enum.Enum):
+    """
+    The type of a FlightDescriptor.
+
+    Attributes
+    ----------
+
+    UNKNOWN
+        An unknown descriptor type.
+
+    PATH
+        A Flight stream represented by a path.
+
+    CMD
+        A Flight stream represented by an application-defined command.
+
+    """
+
+    UNKNOWN = 0
+    PATH = 1
+    CMD = 2
+
+class FlightMethod(enum.Enum):
+    """The implemented methods in Flight."""
+
+    INVALID = 0
+    HANDSHAKE = 1
+    LIST_FLIGHTS = 2
+    GET_FLIGHT_INFO = 3
+    GET_SCHEMA = 4
+    DO_GET = 5
+    DO_PUT = 6
+    DO_ACTION = 7
+    LIST_ACTIONS = 8
+    DO_EXCHANGE = 9
+
+class FlightDescriptor(_Weakrefable):
+    """A description of a data stream available from a Flight service."""
+    @staticmethod
+    def for_path(*path: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for a resource path."""
+
+    @staticmethod
+    def for_command(command: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for an opaque command."""
+    @property
+    def descriptor_type(self) -> DescriptorType:
+        """Get the type of this descriptor."""
+    @property
+    def path(self) -> list[bytes] | None:
+        """Get the path for this descriptor."""
+    @property
+    def command(self) -> bytes | None:
+        """Get the command for this descriptor."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Ticket(_Weakrefable):
+    """A ticket for requesting a Flight stream."""
+    def __init__(self, ticket: str | bytes) -> None: ...
+    @property
+    def ticket(self) -> bytes: ...
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Location(_Weakrefable):
+    """The location of a Flight service."""
+    def __init__(self, uri: str | bytes) -> None: ...
+    @property
+    def uri(self) -> bytes: ...
+    def equals(self, other: Location) -> bool: ...
+    @staticmethod
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TCP-based gRPC service."""
+    @staticmethod
+    def for_grpc_tls(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TLS-based gRPC service."""
+    @staticmethod
+    def for_grpc_unix(path: str | bytes) -> Location:
+        """Create a Location for a domain socket-based gRPC service."""
+
+class FlightEndpoint(_Weakrefable):
+    """A Flight stream, along with the ticket and locations to access it."""
+    def __init__(
+        self,
+        ticket: Ticket | str | bytes,
+        locations: list[str | Location],
+        expiration_time: TimestampScalar | None = ...,
+        app_metadata: bytes | str = ...,
+    ):
+        """Create a FlightEndpoint from a ticket and list of locations.
+
+        Parameters
+        ----------
+        ticket : Ticket or bytes
+            the ticket needed to access this flight
+        locations : list of string URIs
+            locations where this flight is available
+        expiration_time : TimestampScalar, default None
+            Expiration time of this stream. If present, clients may assume
+            they can retry DoGet requests. Otherwise, clients should avoid
+            retrying DoGet requests.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+
+        Raises
+        ------
+        ArrowException
+            If one of the location URIs is not a valid URI.
+        """
+    @property
+    def ticket(self) -> Ticket:
+        """Get the ticket in this endpoint."""
+    @property
+    def locations(self) -> list[Location]:
+        """Get locations where this flight is available."""
+    def serialize(self) -> bytes: ...
+    @property
+    def expiration_time(self) -> TimestampScalar | None:
+        """Get the expiration time of this stream.
+
+        If present, clients may assume they can retry DoGet requests.
+        Otherwise, clients should avoid retrying DoGet requests.
+
+        """
+    @property
+    def app_metadata(self) -> bytes | str:
+        """Get application-defined opaque metadata."""
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class SchemaResult(_Weakrefable):
+    """The serialized schema returned from a GetSchema request."""
+    def __init__(self, schema: Schema) -> None:
+        """Create a SchemaResult from a schema.
+
+        Parameters
+        ----------
+        schema: Schema
+            the schema of the data in this flight.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightInfo(_Weakrefable):
+    """A description of a Flight stream."""
+    def __init__(
+        self,
+        schema: Schema,
+        descriptor: FlightDescriptor,
+        endpoints: list[FlightEndpoint],
+        total_records: int = ...,
+        total_bytes: int = ...,
+        ordered: bool = ...,
+        app_metadata: bytes | str = ...,
+    ) -> None:
+        """Create a FlightInfo object from a schema, descriptor, and endpoints.
+
+        Parameters
+        ----------
+        schema : Schema
+            the schema of the data in this flight.
+        descriptor : FlightDescriptor
+            the descriptor for this flight.
+        endpoints : list of FlightEndpoint
+            a list of endpoints where this flight is available.
+        total_records : int, default None
+            the total records in this flight, -1 or None if unknown.
+        total_bytes : int, default None
+            the total bytes in this flight, -1 or None if unknown.
+        ordered : boolean, default False
+            Whether endpoints are in the same order as the data.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    @property
+    def descriptor(self) -> FlightDescriptor:
+        """The descriptor of the data in this flight."""
+    @property
+    def endpoints(self) -> list[FlightEndpoint]:
+        """The endpoints where this flight is available."""
+    @property
+    def total_records(self) -> int:
+        """The total record count of this flight, or -1 if unknown."""
+    @property
+    def total_bytes(self) -> int:
+        """The size in bytes of the data in this flight, or -1 if unknown."""
+    @property
+    def ordered(self) -> bool:
+        """Whether endpoints are in the same order as the data."""
+    @property
+    def app_metadata(self) -> bytes | str:
+        """
+        Application-defined opaque metadata.
+
+        There is no inherent or required relationship between this and the
+        app_metadata fields in the FlightEndpoints or resulting FlightData
+        messages. Since this metadata is application-defined, a given
+        application could define there to be a relationship, but there is
+        none required by the spec.
+
+        """
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightStreamChunk(_Weakrefable):
+    """A RecordBatch with application metadata on the side."""
+    @property
+    def data(self) -> RecordBatch | None: ...
+    @property
+    def app_metadata(self) -> Buffer | None: ...
+    def __iter__(self): ...
+
+class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
+    """A reader for Flight streams."""
+
+    # Needs to be separate class so the "real" class can subclass the
+    # pure-Python mixin class
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> FlightStreamChunk: ...
+    @property
+    def schema(self) -> Schema:
+        """Get the schema for this reader."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+    def read_chunk(self) -> FlightStreamChunk:
+        """Read the next FlightStreamChunk along with any metadata.
+
+        Returns
+        -------
+        chunk : FlightStreamChunk
+            The next FlightStreamChunk in the stream.
+
+        Raises
+        ------
+        StopIteration
+            when the stream is finished
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Convert this reader into a regular RecordBatchReader.
+
+        This may fail if the schema cannot be read from the remote end.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader):
+    """The base class for readers for Flight streams.
+
+    See Also
+    --------
+    FlightStreamReader
+    """
+
+class FlightStreamReader(MetadataRecordBatchReader):
+    """A reader that can also be canceled."""
+    def cancel(self) -> None:
+        """Cancel the read operation."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+
+class MetadataRecordBatchWriter(_CRecordBatchWriter):
+    """A RecordBatchWriter that also allows writing application metadata.
+
+    This class is a context manager; on exit, close() will be called.
+    """
+
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None:
+        """Prepare to write data to this stream with the given schema."""
+    def write_metadata(self, buf: Buffer) -> None:
+        """Write Flight metadata by itself."""
+    def write_batch(self, batch: RecordBatch) -> None:  # type: ignore[override]
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None:
+        """Write a RecordBatch along with Flight metadata.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+            The next RecordBatch in the stream.
+        buf : Buffer
+            Application-specific metadata for the batch as defined by
+            Flight.
+        """
+
+class FlightStreamWriter(MetadataRecordBatchWriter):
+    """A writer that also allows closing the write side of a stream."""
+    def done_writing(self) -> None:
+        """Indicate that the client is done writing, but not done reading."""
+
+class FlightMetadataReader(_Weakrefable):
+    """A reader for Flight metadata messages sent during a DoPut."""
+    def read(self) -> Buffer | None:
+        """Read the next metadata message."""
+
+class FlightMetadataWriter(_Weakrefable):
+    """A sender for Flight metadata messages during a DoPut."""
+    def write(self, message: Buffer) -> None:
+        """Write the next metadata message.
+
+        Parameters
+        ----------
+        message : Buffer
+        """
+
+class AsyncioCall(Generic[_T]):
+    """State for an async RPC using asyncio."""
+
+    _future: asyncio.Future[_T]
+
+    def as_awaitable(self) -> asyncio.Future[_T]: ...
+    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
+
+class AsyncioFlightClient:
+    """
+    A FlightClient with an asyncio-based async interface.
+
+    This interface is EXPERIMENTAL.
+    """
+
+    def __init__(self, client: FlightClient) -> None: ...
+    async def get_flight_info(
+        self,
+        descriptor: FlightDescriptor,
+        *,
+        options: FlightCallOptions | None = None,
+    ): ...
+
+class FlightClient(_Weakrefable):
+    """A client to a Flight service.
+
+    Connect to a Flight service on the given host and port.
+
+    Parameters
+    ----------
+    location : str, tuple or Location
+        Location to connect to. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded
+    cert_chain: bytes or None
+        Client certificate if using mutual TLS
+    private_key: bytes or None
+        Client private key for cert_chain is using mutual TLS
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list optional, default None
+        A list of ClientMiddlewareFactory instances.
+    write_size_limit_bytes : int optional, default None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean optional, default False
+        A flag that indicates that, if the client is connecting
+        with TLS, that it skips server verification. If this is
+        enabled, all other TLS settings are overridden.
+    generic_options : list optional, default None
+        A list of generic (string, int or string) option tuples passed
+        to the underlying transport. Effect is implementation
+        dependent.
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location,
+        *,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        middleware: list[ClientMiddlewareFactory] | None = None,
+        write_size_limit_bytes: int | None = None,
+        disable_server_verification: bool = False,
+        generic_options: list[tuple[str, int | str]] | None = None,
+    ): ...
+    @property
+    def supports_async(self) -> bool: ...
+    def as_async(self) -> AsyncioFlightClient: ...
+    def wait_for_available(self, timeout: int = 5) -> None:
+        """Block until the server can be contacted.
+
+        Parameters
+        ----------
+        timeout : int, default 5
+            The maximum seconds to wait.
+        """
+    @deprecated(
+        "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
+    )
+    @classmethod
+    def connect(
+        cls,
+        location: str | tuple[str, int] | Location,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        disable_server_verification: bool = False,
+    ) -> FlightClient:
+        """Connect to a Flight server.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead.
+        """
+    def authenticate(
+        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
+    ) -> None:
+        """Authenticate to the server.
+
+        Parameters
+        ----------
+        auth_handler : ClientAuthHandler
+            The authentication mechanism to use.
+        options : FlightCallOptions
+            Options for this call.
+        """
+    def authenticate_basic_token(
+        self, username: str, password: str, options: FlightCallOptions | None = None
+    ) -> tuple[str, str]:
+        """Authenticate to the server with HTTP basic authentication.
+
+        Parameters
+        ----------
+        username : string
+            Username to authenticate with
+        password : string
+            Password to authenticate with
+        options  : FlightCallOptions
+            Options for this call
+
+        Returns
+        -------
+        tuple : Tuple[str, str]
+            A tuple representing the FlightCallOptions authorization
+            header entry of a bearer token.
+        """
+    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]:
+        """List the actions available on a service."""
+    def do_action(
+        self, action: Action, options: FlightCallOptions | None = None
+    ) -> Iterator[Result]:
+        """
+        Execute an action on a service.
+
+        Parameters
+        ----------
+        action : str, tuple, or Action
+            Can be action type name (no body), type and body, or any Action
+            object
+        options : FlightCallOptions
+            RPC options
+
+        Returns
+        -------
+        results : iterator of Result values
+        """
+    def list_flights(
+        self, criteria: str | None = None, options: FlightCallOptions | None = None
+    ) -> Generator[FlightInfo, None, None]:
+        """List the flights available on a service."""
+    def get_flight_info(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> FlightInfo:
+        """Request information about an available flight."""
+    def get_schema(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> Schema:
+        """Request schema for an available flight."""
+    def do_get(
+        self, ticket: Ticket, options: FlightCallOptions | None = None
+    ) -> FlightStreamReader:
+        """Request the data for a flight.
+
+        Returns
+        -------
+        reader : FlightStreamReader
+        """
+    def do_put(
+        self,
+        descriptor: FlightDescriptor,
+        schema: Schema,
+        options: FlightCallOptions | None = None,
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Upload data to a flight.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightMetadataReader
+        """
+    def do_exchange(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Start a bidirectional data exchange with a server.
+
+        Parameters
+        ----------
+        descriptor : FlightDescriptor
+            A descriptor for the flight.
+        options : FlightCallOptions
+            RPC options.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightStreamReader
+        """
+    def close(self) -> None:
+        """Close the client and disconnect."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
+
+class FlightDataStream(_Weakrefable):
+    """
+    Abstract base class for Flight data streams.
+
+    See Also
+    --------
+    RecordBatchStream
+    GeneratorStream
+    """
+
+class RecordBatchStream(FlightDataStream):
+    """A Flight data stream backed by RecordBatches.
+
+    The remainder of this DoGet request will be handled in C++,
+    without having to acquire the GIL.
+
+    """
+    def __init__(
+        self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
+    ) -> None:
+        """Create a RecordBatchStream from a data source.
+
+        Parameters
+        ----------
+        data_source : RecordBatchReader or Table
+            The data to stream to the client.
+        options : pyarrow.ipc.IpcWriteOptions, optional
+            Optional IPC options to control how to write the data.
+        """
+
+class GeneratorStream(FlightDataStream):
+    """A Flight data stream backed by a Python generator."""
+    def __init__(
+        self,
+        schema: Schema,
+        generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
+        options: IpcWriteOptions | None = None,
+    ) -> None:
+        """Create a GeneratorStream from a Python generator.
+
+        Parameters
+        ----------
+        schema : Schema
+            The schema for the data to be returned.
+
+        generator : iterator or iterable
+            The generator should yield other FlightDataStream objects,
+            Tables, RecordBatches, or RecordBatchReaders.
+
+        options : pyarrow.ipc.IpcWriteOptions, optional
+        """
+
+class ServerCallContext(_Weakrefable):
+    """Per-call state/context."""
+    def peer_identity(self) -> bytes:
+        """Get the identity of the authenticated peer.
+
+        May be the empty string.
+        """
+    def peer(self) -> str:
+        """Get the address of the peer."""
+        # Set safe=True as gRPC on Windows sometimes gives garbage bytes
+    def is_cancelled(self) -> bool:
+        """Check if the current RPC call has been canceled by the client."""
+    def add_header(self, key: str, value: str) -> None:
+        """Add a response header."""
+    def add_trailer(self, key: str, value: str) -> None:
+        """Add a response trailer."""
+    def get_middleware(self, key: str) -> ServerMiddleware | None:
+        """
+        Get a middleware instance by key.
+
+        Returns None if the middleware was not found.
+        """
+
+class ServerAuthReader(_Weakrefable):
+    """A reader for messages from the client during an auth handshake."""
+    def read(self) -> str: ...
+
+class ServerAuthSender(_Weakrefable):
+    """A writer for messages to the client during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ClientAuthReader(_Weakrefable):
+    """A reader for messages from the server during an auth handshake."""
+    def read(self) -> str: ...
+
+class ClientAuthSender(_Weakrefable):
+    """A writer for messages to the server during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ServerAuthHandler(_Weakrefable):
+    """Authentication middleware for a server.
+
+    To implement an authentication mechanism, subclass this class and
+    override its methods.
+
+    """
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
+        """Conduct the handshake with the client.
+
+        May raise an error if the client cannot authenticate.
+
+        Parameters
+        ----------
+        outgoing : ServerAuthSender
+            A channel to send messages to the client.
+        incoming : ServerAuthReader
+            A channel to read messages from the client.
+        """
+    def is_valid(self, token: str) -> bool:
+        """Validate a client token, returning their identity.
+
+        May return an empty string (if the auth mechanism does not
+        name the peer) or raise an exception (if the token is
+        invalid).
+
+        Parameters
+        ----------
+        token : bytes
+            The authentication token from the client.
+
+        """
+
+class ClientAuthHandler(_Weakrefable):
+    """Authentication plugin for a client."""
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader):
+        """Conduct the handshake with the server.
+
+        Parameters
+        ----------
+        outgoing : ClientAuthSender
+            A channel to send messages to the server.
+        incoming : ClientAuthReader
+            A channel to read messages from the server.
+        """
+    def get_token(self) -> str:
+        """Get the auth token for a call."""
+
+class CallInfo(NamedTuple):
+    """Information about a particular RPC for Flight middleware."""
+
+    method: FlightMethod
+
+class ClientMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    client are accessible from the middleware itself.
+
+    """
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe and must not raise exceptions.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+
+        Returns
+        -------
+        instance : ClientMiddleware
+            An instance of ClientMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        """
+
+class ClientMiddleware(_Weakrefable):
+    """Client-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the request, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]):
+        """A callback when headers are received.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        headers : dict
+            A dictionary of headers from the server. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        """
+
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        exception : ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class ServerMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    middleware are accessible from the method itself.
+
+    """
+
+    def start_call(
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> ServerMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+        headers : dict
+            A dictionary of headers from the client. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        Returns
+        -------
+        instance : ServerMiddleware
+            An instance of ServerMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        Raises
+        ------
+        exception : pyarrow.ArrowException
+            If an exception is raised, the call will be rejected with
+            the given error.
+
+        """
+
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
+    """A factory for tracing middleware instances.
+
+    This enables OpenTelemetry support in Arrow (if Arrow was compiled
+    with OpenTelemetry support enabled). A new span will be started on
+    each RPC call. The TracingServerMiddleware instance can then be
+    retrieved within an RPC handler to get the propagated context,
+    which can be used to start a new span on the Python side.
+
+    Because the Python/C++ OpenTelemetry libraries do not
+    interoperate, spans on the C++ side are not directly visible to
+    the Python side and vice versa.
+
+    """
+
+class ServerMiddleware(_Weakrefable):
+    """Server-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the response, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        Parameters
+        ----------
+        exception : pyarrow.ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class TracingServerMiddleware(ServerMiddleware):
+    trace_context: dict
+    def __init__(self, trace_context: dict) -> None: ...
+
+class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
+    """Wrapper to bundle server middleware into a single C++ one."""
+
+    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
+    def start_call(  # type: ignore[override]
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> _ServerMiddlewareFactoryWrapper | None: ...
+
+class _ServerMiddlewareWrapper(ServerMiddleware):
+    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
+    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
+    def call_completed(self, exception: ArrowException) -> None: ...
+
+class _FlightServerFinalizer(_Weakrefable):
+    """
+    A finalizer that shuts down the server on destruction.
+
+    See ARROW-16597. If the server is still active at interpreter
+    exit, the process may segfault.
+    """
+
+    def finalize(self) -> None: ...
+
+class FlightServerBase(_Weakrefable):
+    """A Flight service definition.
+
+    To start the server, create an instance of this class with an
+    appropriate location. The server will be running as soon as the
+    instance is created; it is not required to call :meth:`serve`.
+
+    Override methods to define your Flight service.
+
+    Parameters
+    ----------
+    location : str, tuple or Location optional, default None
+        Location to serve on. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+        If None is passed then the server will be started on localhost with a
+        system provided random port.
+    auth_handler : ServerAuthHandler optional, default None
+        An authentication mechanism to use. May be None.
+    tls_certificates : list optional, default None
+        A list of (certificate, key) pairs.
+    verify_client : boolean optional, default False
+        If True, then enable mutual TLS: require the client to present
+        a client certificate, and validate the certificate.
+    root_certificates : bytes optional, default None
+        If enabling mutual TLS, this specifies the PEM-encoded root
+        certificate used to validate client certificates.
+    middleware : dict optional, default None
+        A dictionary of :class:`ServerMiddlewareFactory` instances. The
+        string keys can be used to retrieve the middleware instance within
+        RPC handlers (see :meth:`ServerCallContext.get_middleware`).
+
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location | None = None,
+        auth_handler: ServerAuthHandler | None = None,
+        tls_certificates: list[tuple[str, str]] | None = None,
+        verify_client: bool = False,
+        root_certificates: str | None = None,
+        middleware: dict[str, ServerMiddlewareFactory] | None = None,
+    ): ...
+    @property
+    def port(self) -> int:
+        """
+        Get the port that this server is listening on.
+
+        Returns a non-positive value if the operation is invalid
+        (e.g. init() was not called or server is listening on a domain
+        socket).
+        """
+    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]:
+        """List flights available on this service.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        criteria : bytes
+            Filter criteria provided by the client.
+
+        Returns
+        -------
+        iterator of FlightInfo
+
+        """
+    def get_flight_info(
+        self, context: ServerCallContext, descriptor: FlightDescriptor
+    ) -> FlightInfo:
+        """Get information about a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        FlightInfo
+
+        """
+    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema:
+        """Get the schema of a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        Schema
+
+        """
+    def do_put(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: FlightMetadataWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : FlightMetadataWriter
+            A writer to send responses to the client.
+
+        """
+    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        ticket : Ticket
+            The ticket for the flight.
+
+        Returns
+        -------
+        FlightDataStream
+            A stream of data to send back to the client.
+
+        """
+    def do_exchange(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: MetadataRecordBatchWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : MetadataRecordBatchWriter
+            A writer to send responses to the client.
+
+        """
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]:
+        """List custom actions available on this server.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+
+        Returns
+        -------
+        iterator of ActionType or tuple
+
+        """
+    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]:
+        """Execute a custom action.
+
+        This method should return an iterator, or it should be a
+        generator. Applications should override this method to
+        implement their own behavior. The default method raises a
+        NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        action : Action
+            The action to execute.
+
+        Returns
+        -------
+        iterator of bytes
+
+        """
+    def serve(self) -> None:
+        """Block until the server shuts down.
+
+        This method only returns if shutdown() is called or a signal is
+        received.
+        """
+    def run(self) -> None:
+        """Block until the server shuts down.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightServer.serve`` method instead
+        """
+    def shutdown(self) -> None:
+        """Shut down the server, blocking until current requests finish.
+
+        Do not call this directly from the implementation of a Flight
+        method, as then the server will block forever waiting for that
+        request to finish. Instead, call this method from a background
+        thread.
+
+        This method should only be called once.
+        """
+    def wait(self) -> None:
+        """Block until server is terminated with shutdown."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback): ...
+
+def connect(
+    location: str | tuple[str, int] | Location,
+    *,
+    tls_root_certs: str | None = None,
+    cert_chain: str | None = None,
+    private_key: str | None = None,
+    override_hostname: str | None = None,
+    middleware: list[ClientMiddlewareFactory] | None = None,
+    write_size_limit_bytes: int | None = None,
+    disable_server_verification: bool = False,
+    generic_options: list[tuple[str, int | str]] | None = None,
+) -> FlightClient:
+    """
+    Connect to a Flight server.
+
+    Parameters
+    ----------
+    location : str, tuple, or Location
+        Location to connect to. Either a URI like "grpc://localhost:port",
+        a tuple of (host, port), or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded.
+    cert_chain: str or None
+        If provided, enables TLS mutual authentication.
+    private_key: str or None
+        If provided, enables TLS mutual authentication.
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list or None
+        A list of ClientMiddlewareFactory instances to apply.
+    write_size_limit_bytes : int or None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean or None
+        Disable verifying the server when using TLS.
+        Insecure, use with caution.
+    generic_options : list or None
+        A list of generic (string, int or string) options to pass to
+        the underlying transport.
+
+    Returns
+    -------
+    client : FlightClient
+    """
diff --git a/python/stubs/_fs.pyi b/python/stubs/_fs.pyi
new file mode 100644
index 00000000000..7670ef5230d
--- /dev/null
+++ b/python/stubs/_fs.pyi
@@ -0,0 +1,1005 @@
+import datetime as dt
+import enum
+import sys
+
+from abc import ABC, abstractmethod
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Union, overload
+
+from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
+
+from .lib import NativeFile, _Weakrefable
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
+class FileType(enum.IntFlag):
+    NotFound = enum.auto()
+    Unknown = enum.auto()
+    File = enum.auto()
+    Directory = enum.auto()
+
+class FileInfo(_Weakrefable):
+    """
+    FileSystem entry info.
+
+    Parameters
+    ----------
+    path : str
+        The full path to the filesystem entry.
+    type : FileType
+        The type of the filesystem entry.
+    mtime : datetime or float, default None
+        If given, the modification time of the filesystem entry.
+        If a float is given, it is the number of seconds since the
+        Unix epoch.
+    mtime_ns : int, default None
+        If given, the modification time of the filesystem entry,
+        in nanoseconds since the Unix epoch.
+        `mtime` and `mtime_ns` are mutually exclusive.
+    size : int, default None
+        If given, the filesystem entry size in bytes.  This should only
+        be given if `type` is `FileType.File`.
+
+    Examples
+    --------
+    Generate a file:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> path_fs = local_path + "/pyarrow-fs-example.dat"
+    >>> with local.open_output_stream(path_fs) as stream:
+    ...     stream.write(b"data")
+    4
+
+    Get FileInfo object using ``get_file_info()``:
+
+    >>> file_info = local.get_file_info(path_fs)
+    >>> file_info
+    <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+
+    Inspect FileInfo attributes:
+
+    >>> file_info.type
+    <FileType.File: 2>
+
+    >>> file_info.is_file
+    True
+
+    >>> file_info.path
+    '/.../pyarrow-fs-example.dat'
+
+    >>> file_info.base_name
+    'pyarrow-fs-example.dat'
+
+    >>> file_info.size
+    4
+
+    >>> file_info.extension
+    'dat'
+
+    >>> file_info.mtime  # doctest: +SKIP
+    datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+
+    >>> file_info.mtime_ns  # doctest: +SKIP
+    1656489370873922073
+    """
+
+    def __init__(
+        self,
+        path: str,
+        type: FileType = FileType.Unknown,
+        *,
+        mtime: dt.datetime | float | None = None,
+        mtime_ns: int | None = None,
+        size: int | None = None,
+    ): ...
+    @property
+    def type(self) -> FileType:
+        """
+        Type of the file.
+
+        The returned enum values can be the following:
+
+        - FileType.NotFound: target does not exist
+        - FileType.Unknown: target exists but its type is unknown (could be a
+          special file such as a Unix socket or character device, or
+          Windows NUL / CON / ...)
+        - FileType.File: target is a regular file
+        - FileType.Directory: target is a regular directory
+
+        Returns
+        -------
+        type : FileType
+        """
+    @property
+    def is_file(self) -> bool: ...
+    @property
+    def path(self) -> str:
+        """
+        The full file path in the filesystem.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.path
+        '/.../pyarrow-fs-example.dat'
+        """
+    @property
+    def base_name(self) -> str:
+        """
+        The file base name.
+
+        Component after the last directory separator.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.base_name
+        'pyarrow-fs-example.dat'
+        """
+    @property
+    def size(self) -> int:
+        """
+        The size in bytes, if available.
+
+        Only regular files are guaranteed to have a size.
+
+        Returns
+        -------
+        size : int or None
+        """
+    @property
+    def extension(self) -> str:
+        """
+        The file extension.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.extension
+        'dat'
+        """
+    @property
+    def mtime(self) -> dt.datetime | None:
+        """
+        The time of last modification, if available.
+
+        Returns
+        -------
+        mtime : datetime.datetime or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime  # doctest: +SKIP
+        datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+        """
+    @property
+    def mtime_ns(self) -> int | None:
+        """
+        The time of last modification, if available, expressed in nanoseconds
+        since the Unix epoch.
+
+        Returns
+        -------
+        mtime_ns : int or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime_ns  # doctest: +SKIP
+        1656489370873922073
+        """
+
+class FileSelector(_Weakrefable):
+    """
+    File and directory selector.
+
+    It contains a set of options that describes how to search for files and
+    directories.
+
+    Parameters
+    ----------
+    base_dir : str
+        The directory in which to select files. Relative paths also work, use
+        '.' for the current directory and '..' for the parent.
+    allow_not_found : bool, default False
+        The behavior if `base_dir` doesn't exist in the filesystem.
+        If false, an error is returned.
+        If true, an empty selection is returned.
+    recursive : bool, default False
+        Whether to recurse into subdirectories.
+
+    Examples
+    --------
+    List the contents of a directory and subdirectories:
+
+    >>> selector_1 = fs.FileSelector(local_path, recursive=True)
+    >>> local.get_file_info(selector_1)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
+    <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
+
+    List only the contents of the base directory:
+
+    >>> selector_2 = fs.FileSelector(local_path)
+    >>> local.get_file_info(selector_2)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
+
+    Return empty selection if the directory doesn't exist:
+
+    >>> selector_not_found = fs.FileSelector(
+    ...     local_path + "/missing", recursive=True, allow_not_found=True
+    ... )
+    >>> local.get_file_info(selector_not_found)
+    []
+    """
+
+    base_dir: str
+    allow_not_found: bool
+    recursive: bool
+    def __init__(self, base_dir: str, allow_not_found: bool = False, recursive: bool = False): ...
+
+class FileSystem(_Weakrefable):
+    """
+    Abstract file system API.
+    """
+
+    @classmethod
+    def from_uri(cls, uri: str) -> tuple[Self, str]:
+        """
+        Create a new FileSystem from URI or Path.
+
+        Recognized URI schemes are "file", "mock", "s3fs", "gs", "gcs", "hdfs" and "viewfs".
+        In addition, the argument can be a pathlib.Path object, or a string
+        describing an absolute local path.
+
+        Parameters
+        ----------
+        uri : string
+            URI-based path, for example: file:///some/local/path.
+
+        Returns
+        -------
+        tuple of (FileSystem, str path)
+            With (filesystem, path) tuple where path is the abstract path
+            inside the FileSystem instance.
+
+        Examples
+        --------
+        Create a new FileSystem subclass from a URI:
+
+        >>> uri = "file:///{}/pyarrow-fs-example.dat".format(local_path)
+        >>> local_new, path_new = fs.FileSystem.from_uri(uri)
+        >>> local_new
+        <pyarrow._fs.LocalFileSystem object at ...
+        >>> path_new
+        '/.../pyarrow-fs-example.dat'
+
+        Or from a s3 bucket:
+
+        >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
+        (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
+        """
+    def equals(self, other: FileSystem) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.fs.FileSystem
+
+        Returns
+        -------
+        bool
+        """
+    @property
+    def type_name(self) -> str:
+        """
+        The filesystem's type name.
+        """
+    @overload
+    def get_file_info(self, paths_or_selector: str) -> FileInfo: ...
+    @overload
+    def get_file_info(self, paths_or_selector: FileSelector | list[str]) -> list[FileInfo]: ...
+    def get_file_info(self, paths_or_selector):
+        """
+        Get info for the given files.
+
+        Any symlink is automatically dereferenced, recursively. A non-existing
+        or unreachable file returns a FileStat object and has a FileType of
+        value NotFound. An exception indicates a truly exceptional condition
+        (low-level I/O error, etc.).
+
+        Parameters
+        ----------
+        paths_or_selector : FileSelector, path-like or list of path-likes
+            Either a selector object, a path-like object or a list of
+            path-like objects. The selector's base directory will not be
+            part of the results, even if it exists. If it doesn't exist,
+            use `allow_not_found`.
+
+        Returns
+        -------
+        FileInfo or list of FileInfo
+            Single FileInfo object is returned for a single path, otherwise
+            a list of FileInfo objects is returned.
+
+        Examples
+        --------
+        >>> local
+        <pyarrow._fs.LocalFileSystem object at ...>
+        >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def create_dir(self, path: str, *, recursive: bool = True) -> None:
+        """
+        Create a directory and subdirectories.
+
+        This function succeeds if the directory already exists.
+
+        Parameters
+        ----------
+        path : str
+            The path of the new directory.
+        recursive : bool, default True
+            Create nested directories as well.
+        """
+    def delete_dir(self, path: str) -> None:
+        """
+        Delete a directory and its contents, recursively.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        """
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None:
+        """
+        Delete a directory's contents, recursively.
+
+        Like delete_dir, but doesn't delete the directory itself.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        accept_root_dir : boolean, default False
+            Allow deleting the root directory's contents
+            (if path is empty or "/")
+        missing_dir_ok : boolean, default False
+            If False then an error is raised if path does
+            not exist
+        """
+    def move(self, src: str, dest: str) -> None:
+        """
+        Move / rename a file or directory.
+
+        If the destination exists:
+        - if it is a non-empty directory, an error is returned
+        - otherwise, if it has the same type as the source, it is replaced
+        - otherwise, behavior is unspecified (implementation-dependent).
+
+        Parameters
+        ----------
+        src : str
+            The path of the file or the directory to be moved.
+        dest : str
+            The destination path where the file or directory is moved to.
+
+        Examples
+        --------
+        Create a new folder with a file:
+
+        >>> local.create_dir("/tmp/other_dir")
+        >>> local.copy_file(path, "/tmp/move_example.dat")
+
+        Move the file:
+
+        >>> local.move("/tmp/move_example.dat", "/tmp/other_dir/move_example_2.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info("/tmp/other_dir/move_example_2.dat")
+        <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
+        >>> local.get_file_info("/tmp/move_example.dat")
+        <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
+
+        Delete the folder:
+        >>> local.delete_dir("/tmp/other_dir")
+        """
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Copy a file.
+
+        If the destination exists and is a directory, an error is returned.
+        Otherwise, it is replaced.
+
+        Parameters
+        ----------
+        src : str
+            The path of the file to be copied from.
+        dest : str
+            The destination path where the file is copied to.
+
+        Examples
+        --------
+        >>> local.copy_file(path, local_path + "/pyarrow-fs-example_copy.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info(local_path + "/pyarrow-fs-example_copy.dat")
+        <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
+        >>> local.get_file_info(path)
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def delete_file(self, path: str) -> None:
+        """
+        Delete a file.
+
+        Parameters
+        ----------
+        path : str
+            The path of the file to be deleted.
+        """
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Open an input file for random access reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_file()`:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_input_stream(
+        self, path: str, compression: str | None = "detect", buffer_size: int | None = None
+    ) -> NativeFile:
+        """
+        Open an input stream for sequential reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly decompression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary read buffer.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_stream()`:
+
+        >>> with local.open_input_stream(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_output_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ) -> NativeFile:
+        """
+        Open an output stream for sequential writing.
+
+        If the target already exists, existing data is truncated.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        >>> local = fs.LocalFileSystem()
+        >>> with local.open_output_stream(path) as stream:
+        ...     stream.write(b"data")
+        4
+        """
+    def open_append_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ):
+        """
+        Open an output stream for appending.
+
+        If the target doesn't exist, a new empty file is created.
+
+        .. note::
+            Some filesystem implementations do not support efficient
+            appending to an existing file, in which case this method will
+            raise NotImplementedError.
+            Consider writing to multiple files (using e.g. the dataset layer)
+            instead.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Append new data to a FileSystem subclass with nonempty file:
+
+        >>> with local.open_append_stream(path) as f:
+        ...     f.write(b"+newly added")
+        12
+
+        Print out the content to the file:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data+newly added'
+        """
+    def normalize_path(self, path: str) -> str:
+        """
+        Normalize filesystem path.
+
+        Parameters
+        ----------
+        path : str
+            The path to normalize
+
+        Returns
+        -------
+        normalized_path : str
+            The normalized path
+        """
+
+class LocalFileSystem(FileSystem):
+    """
+    A FileSystem implementation accessing files on the local machine.
+
+    Details such as symlinks are abstracted away (symlinks are always followed,
+    except when deleting an entry).
+
+    Parameters
+    ----------
+    use_mmap : bool, default False
+        Whether open_input_stream and open_input_file should return
+        a mmap'ed file or a regular file.
+
+    Examples
+    --------
+    Create a FileSystem object with LocalFileSystem constructor:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> local
+    <pyarrow._fs.LocalFileSystem object at ...>
+
+    and write data on to the file:
+
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+    >>> with local.open_input_stream("/tmp/local_fs.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Create a FileSystem object inferred from a URI of the saved file:
+
+    >>> local_new, path = fs.LocalFileSystem().from_uri("/tmp/local_fs.dat")
+    >>> local_new
+    <pyarrow._fs.LocalFileSystem object at ...
+    >>> path
+    '/tmp/local_fs.dat'
+
+    Check if FileSystems `local` and `local_new` are equal:
+
+    >>> local.equals(local_new)
+    True
+
+    Compare two different FileSystems:
+
+    >>> local2 = fs.LocalFileSystem(use_mmap=True)
+    >>> local.equals(local2)
+    False
+
+    Copy a file and print out the data:
+
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/local_fs-copy.dat")
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Open an output stream for appending, add text and print the new data:
+
+    >>> with local.open_append_stream("/tmp/local_fs-copy.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as f:
+    ...     print(f.readall())
+    b'data+newly added'
+
+    Create a directory, copy a file into it and then delete the whole directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.delete_dir("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the content of the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.File, size=4>
+    >>> local.delete_dir_contents("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the file from the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.delete_file("/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+
+    Move the file:
+
+    >>> local.move("/tmp/local_fs-copy.dat", "/tmp/new_folder/local_fs-copy.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs-copy.dat")
+    <FileInfo for '/tmp/new_folder/local_fs-copy.dat': type=FileType.File, size=16>
+    >>> local.get_file_info("/tmp/local_fs-copy.dat")
+    <FileInfo for '/tmp/local_fs-copy.dat': type=FileType.NotFound>
+
+    To finish delete the file left:
+    >>> local.delete_file("/tmp/local_fs.dat")
+    """
+
+    def __init__(self, *, use_mmap: bool = False) -> None: ...
+
+class SubTreeFileSystem(FileSystem):
+    """
+    Delegates to another implementation after prepending a fixed base path.
+
+    This is useful to expose a logical view of a subtree of a filesystem,
+    for example a directory in a LocalFileSystem.
+
+    Note, that this makes no security guarantee. For example, symlinks may
+    allow to "escape" the subtree and access other parts of the underlying
+    filesystem.
+
+    Parameters
+    ----------
+    base_path : str
+        The root of the subtree.
+    base_fs : FileSystem
+        FileSystem object the operations delegated to.
+
+    Examples
+    --------
+    Create a LocalFileSystem instance:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+
+    Create a directory and a SubTreeFileSystem instance:
+
+    >>> local.create_dir("/tmp/sub_tree")
+    >>> subtree = fs.SubTreeFileSystem("/tmp/sub_tree", local)
+
+    Write data into the existing file:
+
+    >>> with subtree.open_append_stream("sub_tree_fs.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    Print out the attributes:
+
+    >>> subtree.base_fs
+    <pyarrow._fs.LocalFileSystem object at ...>
+    >>> subtree.base_path
+    '/tmp/sub_tree/'
+
+    Get info for the given directory or given file:
+
+    >>> subtree.get_file_info("")
+    <FileInfo for '': type=FileType.Directory>
+    >>> subtree.get_file_info("sub_tree_fs.dat")
+    <FileInfo for 'sub_tree_fs.dat': type=FileType.File, size=12>
+
+    Delete the file and directory:
+
+    >>> subtree.delete_file("sub_tree_fs.dat")
+    >>> local.delete_dir("/tmp/sub_tree")
+    >>> local.delete_file("/tmp/local_fs.dat")
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(self, base_path: str, base_fs: FileSystem): ...
+    @property
+    def base_path(self) -> str: ...
+    @property
+    def base_fs(self) -> FileSystem: ...
+
+class _MockFileSystem(FileSystem):
+    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
+
+class PyFileSystem(FileSystem):
+    """
+    A FileSystem with behavior implemented in Python.
+
+    Parameters
+    ----------
+    handler : FileSystemHandler
+        The handler object implementing custom filesystem behavior.
+
+    Examples
+    --------
+    Create an fsspec-based filesystem object for GitHub:
+
+    >>> from fsspec.implementations import github
+    >>> gfs = github.GithubFileSystem("apache", "arrow")  # doctest: +SKIP
+
+    Get a PyArrow FileSystem object:
+
+    >>> from pyarrow.fs import PyFileSystem, FSSpecHandler
+    >>> pa_fs = PyFileSystem(FSSpecHandler(gfs))  # doctest: +SKIP
+
+    Use :func:`~pyarrow.fs.FileSystem` functionality ``get_file_info()``:
+
+    >>> pa_fs.get_file_info("README.md")  # doctest: +SKIP
+    <FileInfo for 'README.md': type=FileType.File, size=...>
+    """
+    def __init__(self, handler: FileSystemHandler) -> None: ...
+    @property
+    def handler(self) -> FileSystemHandler:
+        """
+        The filesystem's underlying handler.
+
+        Returns
+        -------
+        handler : FileSystemHandler
+        """
+
+class FileSystemHandler(ABC):
+    """
+    An abstract class exposing methods to implement PyFileSystem's behavior.
+    """
+    @abstractmethod
+    def get_type_name(self) -> str:
+        """
+        Implement PyFileSystem.type_name.
+        """
+    @abstractmethod
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(paths).
+
+        Parameters
+        ----------
+        paths : list of str
+            paths for which we want to retrieve the info.
+        """
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(selector).
+
+        Parameters
+        ----------
+        selector : FileSelector
+            selector for which we want to retrieve the info.
+        """
+
+    @abstractmethod
+    def create_dir(self, path: str, recursive: bool) -> None:
+        """
+        Implement PyFileSystem.create_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        recursive : bool
+            if the parent directories should be created too.
+        """
+    @abstractmethod
+    def delete_dir(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        """
+    @abstractmethod
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        missing_dir_ok : bool
+            if False an error should be raised if path does not exist
+        """
+    @abstractmethod
+    def delete_root_dir_contents(self) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents("/", accept_root_dir=True).
+        """
+    @abstractmethod
+    def delete_file(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the file.
+        """
+    @abstractmethod
+    def move(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.move(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be moved.
+        dest : str
+            path of where it should be moved to.
+        """
+
+    @abstractmethod
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.copy_file(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be copied.
+        dest : str
+            path of where it should be copied to.
+        """
+    @abstractmethod
+    def open_input_stream(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_output_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+
+    @abstractmethod
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_append_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+    @abstractmethod
+    def normalize_path(self, path: str) -> str:
+        """
+        Implement PyFileSystem.normalize_path(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be normalized.
+        """
diff --git a/python/stubs/_gcsfs.pyi b/python/stubs/_gcsfs.pyi
new file mode 100644
index 00000000000..4fc7ea68e48
--- /dev/null
+++ b/python/stubs/_gcsfs.pyi
@@ -0,0 +1,83 @@
+import datetime as dt
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class GcsFileSystem(FileSystem):
+    """
+    Google Cloud Storage (GCS) backed FileSystem implementation
+
+    By default uses the process described in https://google.aip.dev/auth/4110
+    to resolve credentials. If not running on Google Cloud Platform (GCP),
+    this generally requires the environment variable
+    GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file
+    containing credentials.
+
+    Note: GCS buckets are special and the operations available on them may be
+    limited or more expensive than expected compared to local file systems.
+
+    Note: When pickling a GcsFileSystem that uses default credentials, resolution
+    credentials are not stored in the serialized data. Therefore, when unpickling
+    it is assumed that the necessary credentials are in place for the target
+    process.
+
+    Parameters
+    ----------
+    anonymous : boolean, default False
+        Whether to connect anonymously.
+        If true, will not attempt to look up credentials using standard GCP
+        configuration methods.
+    access_token : str, default None
+        GCP access token.  If provided, temporary credentials will be fetched by
+        assuming this role; also, a `credential_token_expiration` must be
+        specified as well.
+    target_service_account : str, default None
+        An optional service account to try to impersonate when accessing GCS. This
+        requires the specified credential user or service account to have the necessary
+        permissions.
+    credential_token_expiration : datetime, default None
+        Expiration for credential generated with an access token. Must be specified
+        if `access_token` is specified.
+    default_bucket_location : str, default 'US'
+        GCP region to create buckets in.
+    scheme : str, default 'https'
+        GCS connection transport scheme.
+    endpoint_override : str, default None
+        Override endpoint with a connect string such as "localhost:9000"
+    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
+        Default metadata for `open_output_stream`.  This will be ignored if
+        non-empty metadata is passed to `open_output_stream`.
+    retry_time_limit : timedelta, default None
+        Set the maximum amount of time the GCS client will attempt to retry
+        transient errors. Subsecond granularity is ignored.
+    project_id : str, default None
+        The GCP project identifier to use for creating buckets.
+        If not set, the library uses the GOOGLE_CLOUD_PROJECT environment
+        variable. Most I/O operations do not need a project id, only applications
+        that create new buckets need a project id.
+    """
+
+    def __init__(
+        self,
+        *,
+        anonymous: bool = False,
+        access_token: str | None = None,
+        target_service_account: str | None = None,
+        credential_token_expiration: dt.datetime | None = None,
+        default_bucket_location: str = "US",
+        scheme: str = "https",
+        endpoint_override: str | None = None,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        retry_time_limit: dt.timedelta | None = None,
+        project_id: str | None = None,
+    ): ...
+    @property
+    def default_bucket_location(self) -> str:
+        """
+        The GCP location this filesystem will write to.
+        """
+    @property
+    def project_id(self) -> str:
+        """
+        The GCP project id this filesystem will use.
+        """
diff --git a/python/stubs/_hdfs.pyi b/python/stubs/_hdfs.pyi
new file mode 100644
index 00000000000..200f669379b
--- /dev/null
+++ b/python/stubs/_hdfs.pyi
@@ -0,0 +1,75 @@
+from _typeshed import StrPath
+
+from ._fs import FileSystem
+
+class HadoopFileSystem(FileSystem):
+    """
+    HDFS backed FileSystem implementation
+
+    Parameters
+    ----------
+    host : str
+        HDFS host to connect to. Set to "default" for fs.defaultFS from
+        core-site.xml.
+    port : int, default 8020
+        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
+    replication : int, default 3
+        Number of copies each block will have.
+    buffer_size : int, default 0
+        If 0, no buffering will happen otherwise the size of the temporary read
+        and write buffer.
+    default_block_size : int, default None
+        None means the default configuration for HDFS, a typical block size is
+        128 MB.
+    kerb_ticket : string or path, default None
+        If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> hdfs = fs.HadoopFileSystem(
+    ...     host, port, user=user, kerb_ticket=ticket_cache_path
+    ... )  # doctest: +SKIP
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(
+        self,
+        host: str,
+        port: int = 8020,
+        *,
+        user: str | None = None,
+        replication: int = 3,
+        buffer_size: int = 0,
+        default_block_size: int | None = None,
+        kerb_ticket: StrPath | None = None,
+        extra_conf: dict | None = None,
+    ): ...
+    @staticmethod
+    def from_uri(uri: str) -> HadoopFileSystem:  # type: ignore[override]
+        """
+        Instantiate HadoopFileSystem object from an URI string.
+
+        The following two calls are equivalent
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
+
+        Parameters
+        ----------
+        uri : str
+            A string URI describing the connection to HDFS.
+            In order to change the user, replication, buffer_size or
+            default_block_size pass the values as query parts.
+
+        Returns
+        -------
+        HadoopFileSystem
+        """
diff --git a/python/stubs/_json.pyi b/python/stubs/_json.pyi
new file mode 100644
index 00000000000..43d2ae83cd8
--- /dev/null
+++ b/python/stubs/_json.pyi
@@ -0,0 +1,169 @@
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+class ReadOptions(_Weakrefable):
+    """
+    Options for reading JSON files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+
+    use_threads: bool
+    """
+    Whether to use multiple threads to accelerate reading.
+    """
+    block_size: int
+    """
+    How much bytes to process at a time from the input stream.
+
+    This will determine multi-threading granularity as well as the size of
+    individual chunks in the Table.
+    """
+    def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
+    def equals(self, other: ReadOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ReadOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class ParseOptions(_Weakrefable):
+    """
+    Options for parsing JSON files.
+
+    Parameters
+    ----------
+    explicit_schema : Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values : bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    unexpected_field_behavior : str, default "infer"
+        How JSON fields outside of explicit_schema (if given) are treated.
+
+        Possible behaviors:
+
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+    """
+
+    explicit_schema: Schema
+    """
+    Optional explicit schema (no type inference, ignores other fields)
+    """
+    newlines_in_values: bool
+    """
+    Whether newline characters are allowed in JSON values.
+    Setting this to True reduces the performance of multi-threaded
+    JSON reading.
+    """
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+    """
+    How JSON fields outside of explicit_schema (if given) are treated.
+
+    Possible behaviors:
+
+        - "ignore": unexpected JSON fields are ignored
+        - "error": error out on unexpected JSON fields
+        - "infer": unexpected JSON fields are type-inferred and included in
+        the output
+
+    Set to "infer" by default.
+    """
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ParseOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class JSONStreamingReader(RecordBatchReader):
+    """An object that reads record batches incrementally from a JSON file.
+
+    Should not be instantiated directly by user code.
+    """
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table:
+    """
+    Read a Table from a stream of JSON data.
+
+    Parameters
+    ----------
+    input_file : str, path or file-like object
+        The location of JSON data. Currently only the line-delimited JSON
+        format is supported.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults).
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults).
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from.
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader:
+    """
+    Open a streaming reader of JSON data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of JSON data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see pyarrow.json.ParseOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.json.JSONStreamingReader`
+    """
diff --git a/python/stubs/_orc.pyi b/python/stubs/_orc.pyi
new file mode 100644
index 00000000000..71bf0dde9ba
--- /dev/null
+++ b/python/stubs/_orc.pyi
@@ -0,0 +1,56 @@
+from typing import IO, Literal
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] | None = None,
+        compression_block_size: int | None = None,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] | None = None,
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/stubs/_parquet.pyi b/python/stubs/_parquet.pyi
new file mode 100644
index 00000000000..a9187df0428
--- /dev/null
+++ b/python/stubs/_parquet.pyi
@@ -0,0 +1,445 @@
+from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+
+from _typeshed import StrPath
+
+from ._stubs_typing import Order
+from .lib import (
+    Buffer,
+    ChunkedArray,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+_PhysicalType: TypeAlias = Literal[
+    "BOOLEAN",
+    "INT32",
+    "INT64",
+    "INT96",
+    "FLOAT",
+    "DOUBLE",
+    "BYTE_ARRAY",
+    "FIXED_LEN_BYTE_ARRAY",
+    "UNKNOWN",
+]
+_LogicTypeName: TypeAlias = Literal[
+    "UNDEFINED",
+    "STRING",
+    "MAP",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME",
+    "TIMESTAMP",
+    "INT",
+    "FLOAT16",
+    "JSON",
+    "BSON",
+    "UUID",
+    "NONE",
+    "UNKNOWN",
+]
+_ConvertedType: TypeAlias = Literal[
+    "NONE",
+    "UTF8",
+    "MAP",
+    "MAP_KEY_VALUE",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME_MILLIS",
+    "TIME_MICROS",
+    "TIMESTAMP_MILLIS",
+    "TIMESTAMP_MICROS",
+    "UINT_8",
+    "UINT_16",
+    "UINT_32",
+    "UINT_64",
+    "INT_8",
+    "INT_16",
+    "INT_32",
+    "INT_64",
+    "JSON",
+    "BSON",
+    "INTERVAL",
+    "UNKNOWN",
+]
+_Encoding: TypeAlias = Literal[
+    "PLAIN",
+    "PLAIN_DICTIONARY",
+    "RLE",
+    "BIT_PACKED",
+    "DELTA_BINARY_PACKED",
+    "DELTA_LENGTH_BYTE_ARRAY",
+    "DELTA_BYTE_ARRAY",
+    "RLE_DICTIONARY",
+    "BYTE_STREAM_SPLIT",
+    "UNKNOWN",
+]
+_Compression: TypeAlias = Literal[
+    "UNCOMPRESSED",
+    "SNAPPY",
+    "GZIP",
+    "LZO",
+    "BROTLI",
+    "LZ4",
+    "ZSTD",
+    "UNKNOWN",
+]
+
+class _Statistics(TypedDict):
+    has_min_max: bool
+    min: Any | None
+    max: Any | None
+    null_count: int | None
+    distinct_count: int | None
+    num_values: int
+    physical_type: _PhysicalType
+
+class Statistics(_Weakrefable):
+    def to_dict(self) -> _Statistics: ...
+    def equals(self, other: Statistics) -> bool: ...
+    @property
+    def has_min_max(self) -> bool: ...
+    @property
+    def hash_null_count(self) -> bool: ...
+    @property
+    def has_distinct_count(self) -> bool: ...
+    @property
+    def min_raw(self) -> Any | None: ...
+    @property
+    def max_raw(self) -> Any | None: ...
+    @property
+    def min(self) -> Any | None: ...
+    @property
+    def max(self) -> Any | None: ...
+    @property
+    def null_count(self) -> int | None: ...
+    @property
+    def distinct_count(self) -> int | None: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+
+class ParquetLogicalType(_Weakrefable):
+    def to_json(self) -> str: ...
+    @property
+    def type(self) -> _LogicTypeName: ...
+
+class _ColumnChunkMetaData(TypedDict):
+    file_offset: int
+    file_path: str | None
+    physical_type: _PhysicalType
+    num_values: int
+    path_in_schema: str
+    is_stats_set: bool
+    statistics: Statistics | None
+    compression: _Compression
+    encodings: tuple[_Encoding, ...]
+    has_dictionary_page: bool
+    dictionary_page_offset: int | None
+    data_page_offset: int
+    total_compressed_size: int
+    total_uncompressed_size: int
+
+class ColumnChunkMetaData(_Weakrefable):
+    def to_dict(self) -> _ColumnChunkMetaData: ...
+    def equals(self, other: ColumnChunkMetaData) -> bool: ...
+    @property
+    def file_offset(self) -> int: ...
+    @property
+    def file_path(self) -> str | None: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def path_in_schema(self) -> str: ...
+    @property
+    def is_stats_set(self) -> bool: ...
+    @property
+    def statistics(self) -> Statistics | None: ...
+    @property
+    def compression(self) -> _Compression: ...
+    @property
+    def encodings(self) -> tuple[_Encoding, ...]: ...
+    @property
+    def has_dictionary_page(self) -> bool: ...
+    @property
+    def dictionary_page_offset(self) -> int | None: ...
+    @property
+    def data_page_offset(self) -> int: ...
+    @property
+    def has_index_page(self) -> bool: ...
+    @property
+    def index_page_offset(self) -> int: ...
+    @property
+    def total_compressed_size(self) -> int: ...
+    @property
+    def total_uncompressed_size(self) -> int: ...
+    @property
+    def has_offset_index(self) -> bool: ...
+    @property
+    def has_column_index(self) -> bool: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+class _SortingColumn(TypedDict):
+    column_index: int
+    descending: bool
+    nulls_first: bool
+
+class SortingColumn:
+    def __init__(
+        self, column_index: int, descending: bool = False, nulls_first: bool = False
+    ) -> None: ...
+    @classmethod
+    def from_ordering(
+        cls,
+        schema: Schema,
+        sort_keys: Sequence[tuple[str, Order]],
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> tuple[SortingColumn, ...]: ...
+    @staticmethod
+    def to_ordering(
+        schema: Schema, sorting_columns: tuple[SortingColumn, ...]
+    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
+    def __hash__(self) -> int: ...
+    @property
+    def column_index(self) -> int: ...
+    @property
+    def descending(self) -> bool: ...
+    @property
+    def nulls_first(self) -> bool: ...
+    def to_dict(self) -> _SortingColumn: ...
+
+class _RowGroupMetaData(TypedDict):
+    num_columns: int
+    num_rows: int
+    total_byte_size: int
+    columns: list[ColumnChunkMetaData]
+    sorting_columns: list[SortingColumn]
+
+class RowGroupMetaData(_Weakrefable):
+    def __init__(self, parent: FileMetaData, index: int) -> None: ...
+    def equals(self, other: RowGroupMetaData) -> bool: ...
+    def column(self, i: int) -> ColumnChunkMetaData: ...
+    def to_dict(self) -> _RowGroupMetaData: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def sorting_columns(self) -> list[SortingColumn]: ...
+
+class _FileMetaData(TypedDict):
+    created_by: str
+    num_columns: int
+    num_rows: int
+    num_row_groups: int
+    format_version: str
+    serialized_size: int
+
+class FileMetaData(_Weakrefable):
+    def __hash__(self) -> int: ...
+    def to_dict(self) -> _FileMetaData: ...
+    def equals(self, other: FileMetaData) -> bool: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def serialized_size(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    @property
+    def format_version(self) -> str: ...
+    @property
+    def created_by(self) -> str: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    def row_group(self, i: int) -> RowGroupMetaData: ...
+    def set_file_path(self, path: str) -> None: ...
+    def append_row_groups(self, other: FileMetaData) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer | NativeFile | IO) -> None: ...
+
+class ParquetSchema(_Weakrefable):
+    def __init__(self, container: FileMetaData) -> None: ...
+    def __getitem__(self, i: int) -> ColumnChunkMetaData: ...
+    def __hash__(self) -> int: ...
+    def __len__(self) -> int: ...
+    @property
+    def names(self) -> list[str]: ...
+    def to_arrow_schema(self) -> Schema: ...
+    def equals(self, other: ParquetSchema) -> bool: ...
+    def column(self, i: int) -> ColumnSchema: ...
+
+class ColumnSchema(_Weakrefable):
+    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
+    def equals(self, other: ColumnSchema) -> bool: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def length(self) -> int | None: ...
+    @property
+    def precision(self) -> int | None: ...
+    @property
+    def scale(self) -> int | None: ...
+
+class ParquetReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(
+        self,
+        source: StrPath | NativeFile | IO,
+        *,
+        use_memory_map: bool = False,
+        read_dictionary: Iterable[int] | Iterable[str] | None = None,
+        metadata: FileMetaData | None = None,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    @property
+    def column_paths(self) -> list[str]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def set_use_threads(self, use_threads: bool) -> None: ...
+    def set_batch_size(self, batch_size: int) -> None: ...
+    def iter_batches(
+        self,
+        batch_size: int,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Iterator[RecordBatch]: ...
+    def read_row_group(
+        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def read_row_groups(
+        self,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Table: ...
+    def read_all(
+        self, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def scan_contents(self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+    def column_name_idx(self, column_name: str) -> int: ...
+    def read_column(self, column_index: int) -> ChunkedArray: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+class ParquetWriter(_Weakrefable):
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        schema: Schema,
+        use_dictionary: bool | list[str] | None = None,
+        compression: _Compression | dict[str, _Compression] | None = None,
+        version: str | None = None,
+        write_statistics: bool | list[str] | None = None,
+        memory_pool: MemoryPool | None = None,
+        use_deprecated_int96_timestamps: bool = False,
+        coerce_timestamps: Literal["ms", "us"] | None = None,
+        data_page_size: int | None = None,
+        allow_truncated_timestamps: bool = False,
+        compression_level: int | dict[str, int] | None = None,
+        use_byte_stream_split: bool | list[str] = False,
+        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
+        writer_engine_version: str | None = None,
+        data_page_version: str | None = None,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileDecryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: tuple[SortingColumn, ...] | None = None,
+        store_decimal_as_integer: bool = False,
+    ): ...
+    def close(self) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def use_dictionary(self) -> bool | list[str] | None: ...
+    @property
+    def use_deprecated_int96_timestamps(self) -> bool: ...
+    @property
+    def use_byte_stream_split(self) -> bool | list[str]: ...
+    @property
+    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
+    @property
+    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
+    @property
+    def allow_truncated_timestamps(self) -> bool: ...
+    @property
+    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
+    @property
+    def compression_level(self) -> int | dict[str, int] | None: ...
+    @property
+    def data_page_version(self) -> str | None: ...
+    @property
+    def use_compliant_nested_type(self) -> bool: ...
+    @property
+    def version(self) -> str | None: ...
+    @property
+    def write_statistics(self) -> bool | list[str] | None: ...
+    @property
+    def writer_engine_version(self) -> str: ...
+    @property
+    def row_group_size(self) -> int: ...
+    @property
+    def data_page_size(self) -> int: ...
+    @property
+    def encryption_properties(self) -> FileDecryptionProperties: ...
+    @property
+    def write_batch_size(self) -> int: ...
+    @property
+    def dictionary_pagesize_limit(self) -> int: ...
+    @property
+    def store_schema(self) -> bool: ...
+    @property
+    def store_decimal_as_integer(self) -> bool: ...
+
+class FileEncryptionProperties: ...
+class FileDecryptionProperties: ...
diff --git a/python/stubs/_parquet_encryption.pyi b/python/stubs/_parquet_encryption.pyi
new file mode 100644
index 00000000000..c707edb844a
--- /dev/null
+++ b/python/stubs/_parquet_encryption.pyi
@@ -0,0 +1,67 @@
+import datetime as dt
+
+from typing import Callable
+
+from ._parquet import FileDecryptionProperties, FileEncryptionProperties
+from .lib import _Weakrefable
+
+class EncryptionConfiguration(_Weakrefable):
+    footer_key: str
+    column_keys: dict[str, list[str]]
+    encryption_algorithm: str
+    plaintext_footer: bool
+    double_wrapping: bool
+    cache_lifetime: dt.timedelta
+    internal_key_material: bool
+    data_key_length_bits: int
+
+    def __init__(
+        self,
+        footer_key: str,
+        *,
+        column_keys: dict[str, str | list[str]] | None = None,
+        encryption_algorithm: str | None = None,
+        plaintext_footer: bool | None = None,
+        double_wrapping: bool | None = None,
+        cache_lifetime: dt.timedelta | None = None,
+        internal_key_material: bool | None = None,
+        data_key_length_bits: int | None = None,
+    ) -> None: ...
+
+class DecryptionConfiguration(_Weakrefable):
+    cache_lifetime: dt.timedelta
+    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
+
+class KmsConnectionConfig(_Weakrefable):
+    kms_instance_id: str
+    kms_instance_url: str
+    key_access_token: str
+    custom_kms_conf: dict[str, str]
+    def __init__(
+        self,
+        *,
+        kms_instance_id: str | None = None,
+        kms_instance_url: str | None = None,
+        key_access_token: str | None = None,
+        custom_kms_conf: dict[str, str] | None = None,
+    ) -> None: ...
+    def refresh_key_access_token(self, value: str) -> None: ...
+
+class KmsClient(_Weakrefable):
+    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
+    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
+
+class CryptoFactory(_Weakrefable):
+    def __init__(self, kms_client_factory: Callable[[KmsConnectionConfig], KmsClient]): ...
+    def file_encryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> FileEncryptionProperties: ...
+    def file_decryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration | None = None,
+    ) -> FileDecryptionProperties: ...
+    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
+    def remove_cache_entries_for_all_tokens(self) -> None: ...
diff --git a/python/stubs/_s3fs.pyi b/python/stubs/_s3fs.pyi
new file mode 100644
index 00000000000..fc13c498bd9
--- /dev/null
+++ b/python/stubs/_s3fs.pyi
@@ -0,0 +1,74 @@
+import enum
+
+from typing import Literal, NotRequired, Required, TypedDict
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class _ProxyOptions(TypedDict):
+    schema: Required[Literal["http", "https"]]
+    host: Required[str]
+    port: Required[int]
+    username: NotRequired[str]
+    password: NotRequired[str]
+
+class S3LogLevel(enum.IntEnum):
+    Off = enum.auto()
+    Fatal = enum.auto()
+    Error = enum.auto()
+    Warn = enum.auto()
+    Info = enum.auto()
+    Debug = enum.auto()
+    Trace = enum.auto()
+
+Off = S3LogLevel.Off
+Fatal = S3LogLevel.Fatal
+Error = S3LogLevel.Error
+Warn = S3LogLevel.Warn
+Info = S3LogLevel.Info
+Debug = S3LogLevel.Debug
+Trace = S3LogLevel.Trace
+
+def initialize_s3(
+    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
+) -> None: ...
+def ensure_s3_initialized() -> None: ...
+def finalize_s3() -> None: ...
+def ensure_s3_finalized() -> None: ...
+def resolve_s3_region(bucket: str) -> str: ...
+
+class S3RetryStrategy:
+    max_attempts: int
+    def __init__(self, max_attempts=3) -> None: ...
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy): ...
+class AwsDefaultS3RetryStrategy(S3RetryStrategy): ...
+
+class S3FileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        access_key: str | None = None,
+        secret_key: str | None = None,
+        session_token: str | None = None,
+        anonymous: bool = False,
+        region: str | None = None,
+        request_timeout: float | None = None,
+        connect_timeout: float | None = None,
+        scheme: Literal["http", "https"] = "https",
+        endpoint_override: str | None = None,
+        background_writes: bool = True,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        role_arn: str | None = None,
+        session_name: str | None = None,
+        external_id: str | None = None,
+        load_frequency: int = 900,
+        proxy_options: _ProxyOptions | str | None = None,
+        allow_bucket_creation: bool = False,
+        allow_bucket_deletion: bool = False,
+        check_directory_existence_before_creation: bool = False,
+        retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3),
+        force_virtual_addressing: bool = False,
+    ): ...
+    @property
+    def region(self) -> str: ...
diff --git a/python/stubs/_stubs_typing.pyi b/python/stubs/_stubs_typing.pyi
new file mode 100644
index 00000000000..c259513f1ea
--- /dev/null
+++ b/python/stubs/_stubs_typing.pyi
@@ -0,0 +1,80 @@
+import datetime as dt
+
+from collections.abc import Sequence
+from decimal import Decimal
+from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar
+
+import numpy as np
+
+from numpy.typing import NDArray
+
+from .compute import BooleanArray, IntegerArray
+
+ArrayLike: TypeAlias = Any
+ScalarLike: TypeAlias = Any
+Order: TypeAlias = Literal["ascending", "descending"]
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+Compression: TypeAlias = Literal[
+    "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
+]
+NullEncoding: TypeAlias = Literal["mask", "encode"]
+NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
+Mask: TypeAlias = Sequence[bool | None] | NDArray[np.bool_] | BooleanArray
+Indices: TypeAlias = Sequence[int] | NDArray[np.integer[Any]] | IntegerArray
+PyScalar: TypeAlias = (
+    bool | int | float | Decimal | str | bytes | dt.date | dt.datetime | dt.time | dt.timedelta
+)
+
+_T = TypeVar("_T")
+SingleOrList: TypeAlias = list[_T] | _T
+
+class SupportEq(Protocol):
+    def __eq__(self, other) -> bool: ...
+
+class SupportLt(Protocol):
+    def __lt__(self, other) -> bool: ...
+
+class SupportGt(Protocol):
+    def __gt__(self, other) -> bool: ...
+
+class SupportLe(Protocol):
+    def __le__(self, other) -> bool: ...
+
+class SupportGe(Protocol):
+    def __ge__(self, other) -> bool: ...
+
+FilterTuple: TypeAlias = (
+    tuple[str, Literal["=", "==", "!="], SupportEq]
+    | tuple[str, Literal["<"], SupportLt]
+    | tuple[str, Literal[">"], SupportGt]
+    | tuple[str, Literal["<="], SupportLe]
+    | tuple[str, Literal[">="], SupportGe]
+    | tuple[str, Literal["in", "not in"], Collection]
+)
+
+class Buffer(Protocol):
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class SupportPyBuffer(Protocol):
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class SupportArrowStream(Protocol):
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+class SupportArrowArray(Protocol):
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+class SupportArrowDeviceArray(Protocol):
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+class SupportArrowSchema(Protocol):
+    def __arrow_c_schema(self) -> Any: ...
diff --git a/python/stubs/_substrait.pyi b/python/stubs/_substrait.pyi
new file mode 100644
index 00000000000..ff226e9521b
--- /dev/null
+++ b/python/stubs/_substrait.pyi
@@ -0,0 +1,39 @@
+from typing import Any, Callable
+
+from ._compute import Expression
+from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
+
+def run_query(
+    plan: Buffer | int,
+    *,
+    table_provider: Callable[[list[str], Schema], Table] | None = None,
+    use_threads: bool = True,
+) -> RecordBatchReader: ...
+def _parse_json_plan(plan: bytes) -> Buffer: ...
+
+class SubstraitSchema:
+    schema: Schema
+    expression: Expression
+    def __init__(self, schema: Schema, expression: Expression) -> None: ...
+    def to_pysubstrait(self) -> Any: ...
+
+def serialize_schema(schema: Schema) -> SubstraitSchema: ...
+def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
+def serialize_expressions(
+    exprs: list[Expression],
+    names: list[str],
+    schema: Schema,
+    *,
+    allow_arrow_extensions: bool = False,
+) -> Buffer: ...
+
+class BoundExpressions(_Weakrefable):
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def expressions(self) -> dict[str, Expression]: ...
+    @classmethod
+    def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
+
+def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
+def get_supported_functions() -> list[str]: ...
diff --git a/python/stubs/acero.pyi b/python/stubs/acero.pyi
new file mode 100644
index 00000000000..8a520bdc24a
--- /dev/null
+++ b/python/stubs/acero.pyi
@@ -0,0 +1,85 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Literal
+
+from . import lib
+from .compute import Expression, FunctionOptions
+
+_StrOrExpr: TypeAlias = str | Expression
+
+class Declaration(lib._Weakrefable):
+    def __init__(
+        self,
+        factory_name: str,
+        options: ExecNodeOptions,
+        inputs: list[Declaration] | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_sequence(cls, decls: list[Declaration]) -> Self: ...
+    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
+    def to_table(self, use_threads: bool = True) -> lib.Table: ...
+
+class ExecNodeOptions(lib._Weakrefable): ...
+
+class TableSourceNodeOptions(ExecNodeOptions):
+    def __init__(self, table: lib.Table) -> None: ...
+
+class FilterNodeOptions(ExecNodeOptions):
+    def __init__(self, filter_expression: Expression) -> None: ...
+
+class ProjectNodeOptions(ExecNodeOptions):
+    def __init__(self, expressions: list[Expression], names: list[str] | None = None) -> None: ...
+
+class AggregateNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        aggregates: list[tuple[list[str], str, FunctionOptions, str]],
+        keys: list[_StrOrExpr] | None = None,
+    ) -> None: ...
+
+class OrderByNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        sort_keys: tuple[tuple[str, Literal["ascending", "descending"]], ...] = (),
+        *,
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> None: ...
+
+class HashJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        join_type: Literal[
+            "left semi",
+            "right semi",
+            "left anti",
+            "right anti",
+            "inner",
+            "left outer",
+            "right outer",
+            "full outer",
+        ],
+        left_keys: _StrOrExpr | list[_StrOrExpr],
+        right_keys: _StrOrExpr | list[_StrOrExpr],
+        left_output: list[_StrOrExpr] | None = None,
+        right_output: list[_StrOrExpr] | None = None,
+        output_suffix_for_left: str = "",
+        output_suffix_for_right: str = "",
+    ) -> None: ...
+
+class AsofJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        left_on: _StrOrExpr,
+        left_by: _StrOrExpr | list[_StrOrExpr],
+        right_on: _StrOrExpr,
+        right_by: _StrOrExpr | list[_StrOrExpr],
+        tolerance: int,
+    ) -> None: ...
diff --git a/python/stubs/benchmark.pyi b/python/stubs/benchmark.pyi
new file mode 100644
index 00000000000..048973301dc
--- /dev/null
+++ b/python/stubs/benchmark.pyi
@@ -0,0 +1,3 @@
+from pyarrow.lib import benchmark_PandasObjectIsNull
+
+__all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/stubs/cffi.pyi b/python/stubs/cffi.pyi
new file mode 100644
index 00000000000..2ae945c5974
--- /dev/null
+++ b/python/stubs/cffi.pyi
@@ -0,0 +1,4 @@
+import cffi
+
+c_source: str
+ffi: cffi.FFI
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
new file mode 100644
index 00000000000..8d8fc35b134
--- /dev/null
+++ b/python/stubs/compute.pyi
@@ -0,0 +1,7779 @@
+# ruff: noqa: I001
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from collections.abc import Callable
+
+# Option classes
+from pyarrow._compute import ArraySortOptions as ArraySortOptions
+from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
+from pyarrow._compute import CastOptions as CastOptions
+from pyarrow._compute import CountOptions as CountOptions
+from pyarrow._compute import CumulativeOptions as CumulativeOptions
+from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
+from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
+from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
+from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+
+# Expressions
+from pyarrow._compute import Expression as Expression
+from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
+from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
+from pyarrow._compute import FilterOptions as FilterOptions
+from pyarrow._compute import Function as Function
+from pyarrow._compute import FunctionOptions as FunctionOptions
+from pyarrow._compute import FunctionRegistry as FunctionRegistry
+from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
+from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
+from pyarrow._compute import IndexOptions as IndexOptions
+from pyarrow._compute import JoinOptions as JoinOptions
+from pyarrow._compute import Kernel as Kernel
+from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
+from pyarrow._compute import ListSliceOptions as ListSliceOptions
+from pyarrow._compute import MakeStructOptions as MakeStructOptions
+from pyarrow._compute import MapLookupOptions as MapLookupOptions
+from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
+from pyarrow._compute import ModeOptions as ModeOptions
+from pyarrow._compute import NullOptions as NullOptions
+from pyarrow._compute import PadOptions as PadOptions
+from pyarrow._compute import PairwiseOptions as PairwiseOptions
+from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
+from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
+from pyarrow._compute import QuantileOptions as QuantileOptions
+from pyarrow._compute import RandomOptions as RandomOptions
+from pyarrow._compute import RankOptions as RankOptions
+from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
+from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
+from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
+from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
+from pyarrow._compute import RoundOptions as RoundOptions
+from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
+from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
+from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
+from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
+from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
+from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
+from pyarrow._compute import ScalarFunction as ScalarFunction
+from pyarrow._compute import ScalarKernel as ScalarKernel
+from pyarrow._compute import SelectKOptions as SelectKOptions
+from pyarrow._compute import SetLookupOptions as SetLookupOptions
+from pyarrow._compute import SkewOptions as SkewOptions
+from pyarrow._compute import SliceOptions as SliceOptions
+from pyarrow._compute import SortOptions as SortOptions
+from pyarrow._compute import SplitOptions as SplitOptions
+from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
+from pyarrow._compute import StrftimeOptions as StrftimeOptions
+from pyarrow._compute import StrptimeOptions as StrptimeOptions
+from pyarrow._compute import StructFieldOptions as StructFieldOptions
+from pyarrow._compute import TakeOptions as TakeOptions
+from pyarrow._compute import TDigestOptions as TDigestOptions
+from pyarrow._compute import TrimOptions as TrimOptions
+from pyarrow._compute import UdfContext as UdfContext
+from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import VarianceOptions as VarianceOptions
+from pyarrow._compute import VectorFunction as VectorFunction
+from pyarrow._compute import VectorKernel as VectorKernel
+from pyarrow._compute import WeekOptions as WeekOptions
+from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+
+# Functions
+from pyarrow._compute import call_function as call_function
+
+# Udf
+from pyarrow._compute import call_tabular_function as call_tabular_function
+from pyarrow._compute import function_registry as function_registry
+from pyarrow._compute import get_function as get_function
+from pyarrow._compute import list_functions as list_functions
+from pyarrow._compute import register_aggregate_function as register_aggregate_function
+from pyarrow._compute import register_scalar_function as register_scalar_function
+from pyarrow._compute import register_tabular_function as register_tabular_function
+from pyarrow._compute import register_vector_function as register_vector_function
+
+from pyarrow._compute import _Order, _Placement
+from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from . import lib
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
+    """Reference a column of the dataset.
+
+    Stores only the field's name. Type and other information is known only when
+    the expression is bound to a dataset having an explicit scheme.
+
+    Nested references are allowed by passing multiple names or a tuple of
+    names. For example ``('foo', 'bar')`` references the field named "bar"
+    inside the field named "foo".
+
+    Parameters
+    ----------
+    *name_or_index : string, multiple strings, tuple or int
+        The name or index of the (possibly nested) field the expression
+        references to.
+
+    Returns
+    -------
+    field_expr : Expression
+        Reference to the given field
+
+    Examples
+    --------
+    >>> import pyarrow.compute as pc
+    >>> pc.field("a")
+    <pyarrow.compute.Expression a>
+    >>> pc.field(1)
+    <pyarrow.compute.Expression FieldPath(1)>
+    >>> pc.field(("a", "b"))
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    >>> pc.field("a", "b")
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    """
+
+def scalar(value: bool | float | str) -> Expression:
+    """Expression representing a scalar value.
+
+    Creates an Expression object representing a scalar value that can be used
+    in compute expressions and predicates.
+
+    Parameters
+    ----------
+    value : bool, int, float or string
+        Python value of the scalar. This function accepts any value that can be
+        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.
+
+    Notes
+    -----
+    This function differs from ``pyarrow.scalar()`` in the following way:
+
+    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
+      a single value in Arrow's memory model.
+    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
+      a scalar value that can be used in compute expressions, predicates, and
+      dataset filtering operations.
+
+    Returns
+    -------
+    scalar_expr : Expression
+        An Expression representing the scalar value
+    """
+
+def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+
+# ============= compute functions =============
+_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
+_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
+_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
+_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
+ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+
+SignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.Int8Type]
+    | lib.Scalar[lib.Int16Type]
+    | lib.Scalar[lib.Int32Type]
+    | lib.Scalar[lib.Int64Type]
+)
+UnsignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.UInt8Type]
+    | lib.Scalar[lib.UInt16Type]
+    | lib.Scalar[lib.Uint32Type]
+    | lib.Scalar[lib.UInt64Type]
+)
+IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
+FloatScalar: TypeAlias = (
+    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
+)
+DecimalScalar: TypeAlias = (
+    lib.Scalar[lib.Decimal32Type]
+    | lib.Scalar[lib.Decimal64Type]
+    | lib.Scalar[lib.Decimal128Type]
+    | lib.Scalar[lib.Decimal256Type]
+)
+NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
+NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
+BinaryScalar: TypeAlias = (
+    lib.Scalar[lib.BinaryType]
+    | lib.Scalar[lib.LargeBinaryType]
+    | lib.Scalar[lib.FixedSizeBinaryType]
+)
+StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
+_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
+_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+ListScalar: TypeAlias = (
+    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
+)
+TemporalScalar: TypeAlias = (
+    lib.Date32Scalar
+    | lib.Date64Scalar
+    | lib.Time32Scalar[Any]
+    | lib.Time64Scalar[Any]
+    | lib.TimestampScalar[Any]
+    | lib.DurationScalar[Any]
+    | lib.MonthDayNanoIntervalScalar
+)
+NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
+NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
+
+_NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
+_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
+_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
+NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
+_NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
+NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
+_NumericOrTemporalArrayT = TypeVar("_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
+BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
+_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
+IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
+_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
+FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
+_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
+_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
+StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
+_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
+_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
+BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
+_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
+_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
+StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
+_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
+_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
+TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
+_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
+_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
+_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
+ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+# =============================== 1. Aggregation ===============================
+
+# ========================= 1.1 functions =========================
+
+def all(
+    array: lib.BooleanScalar | BooleanArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar:
+    """
+    Test whether all elements in a boolean array evaluate to true.
+
+    Null values are ignored by default.
+    If the `skip_nulls` option is set to false, then Kleene logic is used.
+    See "kleene_and" for more details on Kleene logic.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+any = _clone_signature(all)
+"""
+Test whether any element in a boolean array evaluates to true.
+
+Null values are ignored by default.
+If the `skip_nulls` option is set to false, then Kleene logic is used.
+See "kleene_or" for more details on Kleene logic.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def approximate_median(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Approximate median of a numeric array with T-Digest algorithm.
+
+    Nulls and NaNs are ignored.
+    A null scalar is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of null / non-null values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count_distinct(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of unique values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first(
+    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the first value in each group.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first_last(
+    array: lib.Array[Any] | lib.ChunkedArray[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar:
+    """
+    Compute the first and last values of an array.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def index(
+    data: lib.Array[Any] | lib.ChunkedArray[Any],
+    value,
+    start: int | None = None,
+    end: int | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array-like
+    value : Scalar-like object
+        The value to search for.
+    start : int, optional
+    end : int, optional
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    index : int
+        the index, or -1 if not found
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
+    >>> pc.index(arr, "ipsum")
+    <pyarrow.Int64Scalar: 1>
+    >>> pc.index(arr, "ipsum", start=2)
+    <pyarrow.Int64Scalar: 5>
+    >>> pc.index(arr, "amet")
+    <pyarrow.Int64Scalar: -1>
+    """
+
+last = _clone_signature(first)
+"""
+Compute the first and last values of an array.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+In [15]: print(pc.last.__doc__)
+Compute the first value in each group.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+max = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min_max = _clone_signature(first_last)
+"""
+Compute the minimum and maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def mean(
+    array: FloatScalar | FloatArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+@overload
+def mean(
+    array: lib.NumericArray[lib.Decimal128Scalar]
+    | lib.ChunkedArray[lib.Decimal128Scalar]
+    | lib.Decimal128Scalar,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Decimal128Scalar: ...
+@overload
+def mean(
+    array: lib.NumericArray[lib.Decimal256Scalar]
+    | lib.ChunkedArray[lib.Decimal256Scalar]
+    | lib.Decimal256Scalar,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Decimal256Scalar: ...
+def mean(*args, **kwargs):
+    """
+    Compute the mean of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+    The result is a double for integer and floating point arguments,
+    and a decimal with the same bit-width/precision/scale for decimal arguments.
+    For integers and floats, NaN is returned if min_count = 0 and
+    there are no values. For decimals, null is returned instead.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def mode(
+    array: NumericScalar | NumericArray,
+    /,
+    n: int = 1,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: ModeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray:
+    """
+    Compute the modal (most common) values of a numeric array.
+
+    Compute the n most common values and their respective occurrence counts.
+    The output has type `struct<mode: T, count: int64>`, where T is the
+    input type.
+    The results are ordered by descending `count` first, and ascending `mode`
+    when breaking ties.
+    Nulls are ignored.  If there are no non-null values in the array,
+    an empty array is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ModeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
+    >>> modes = pc.mode(arr, 2)
+    >>> modes[0]
+    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
+    >>> modes[1]
+    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
+    """
+
+def product(
+    array: _ScalarT | lib.NumericArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the product of values in a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def quantile(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: QuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Compute an array of quantiles of a numeric array or chunked array.
+
+    By default, 0.5 quantile (median) is returned.
+    If quantile lies between two data points, an interpolated value is
+    returned based on selected interpolation method.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.QuantileOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def stddev(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: float = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the standard deviation of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population standard deviation is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def sum(
+    array: _NumericScalarT | NumericArray[_NumericScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT:
+    """
+    Compute the sum of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def tdigest(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    delta: int = 100,
+    buffer_size: int = 500,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: TDigestOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Approximate quantiles of a numeric array with T-Digest algorithm.
+
+    By default, 0.5 quantile (median) is returned.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.TDigestOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+def variance(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: int = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the variance of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population variance is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def top_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the top-k ordered elements from array- or table-like
+    data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get top indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array
+        Indices of the top-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.top_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      5,
+      4,
+      2
+    ]
+    """
+
+def bottom_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the bottom-k ordered elements from
+    array- or table-like data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+        Indices of the bottom-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.bottom_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+# ========================= 2. Element-wise (“scalar”) functions =========================
+
+# ========================= 2.1 Arithmetic =========================
+@overload
+def abs(
+    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT: ...
+@overload
+def abs(
+    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationArrayT: ...
+@overload
+def abs(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def abs(*args, **kwargs):
+    """
+    Calculate the absolute value of the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "abs_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+abs_checked = _clone_signature(abs)
+"""
+Calculate the absolute value of the argument element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "abs".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def add(
+    x: _NumericOrTemporalScalarT,
+    y: _NumericOrTemporalScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT: ...
+@overload
+def add(
+    x: _NumericOrTemporalArrayT,
+    y: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: Expression, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def add(
+    x: NumericOrTemporalScalar,
+    y: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: _NumericOrTemporalArrayT,
+    y: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: NumericOrTemporalScalar, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def add(
+    x: Expression, y: NumericOrTemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def add(*args, **kwargs):
+    """
+    Add the arguments element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "add_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+add_checked = _clone_signature(add)
+"""
+Add the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "add".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+"""
+
+@overload
+def divide(
+    dividend: _NumericOrTemporalScalarT,
+    divisor: _NumericOrTemporalScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT: ...
+@overload
+def divide(
+    dividend: _NumericOrTemporalArrayT,
+    divisor: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: Expression,
+    divisor: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def divide(
+    dividend: NumericOrTemporalScalar,
+    divisor: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: _NumericOrTemporalArrayT,
+    divisor: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: NumericOrTemporalScalar,
+    divisor: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def divide(
+    dividend: Expression,
+    divisor: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def divide(*args, **kwargs):
+    """
+    Divide the arguments element-wise.
+
+    Integer division by zero returns an error. However, integer overflow
+    wraps around, and floating-point division by zero returns an infinite.
+    Use function "divide_checked" if you want to get an error
+    in all the aforementioned cases.
+
+    Parameters
+    ----------
+    dividend : Array-like or scalar-like
+        Argument to compute function.
+    divisor : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+divide_checked = _clone_signature(divide)
+"""
+Divide the arguments element-wise.
+
+An error is returned when trying to divide by zero, or when
+integer overflow is encountered.
+
+Parameters
+----------
+dividend : Array-like or scalar-like
+    Argument to compute function.
+divisor : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def exp(
+    exponent: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatArrayT: ...
+@overload
+def exp(
+    exponent: ArrayOrChunkedArray[NonFloatNumericScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+@overload
+def exp(
+    exponent: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatScalarT: ...
+@overload
+def exp(
+    exponent: NonFloatNumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.DoubleScalar: ...
+@overload
+def exp(exponent: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def exp(*args, **kwargs):
+    """
+    Compute Euler's number raised to the power of specified exponent, element-wise.
+
+    If exponent is null the result will be null.
+
+    Parameters
+    ----------
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+multiply = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "multiply_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+multiply_checked = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "multiply".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def negate(
+    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT: ...
+@overload
+def negate(
+    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationArrayT: ...
+@overload
+def negate(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def negate(*args, **kwargs):
+    """
+    Negate the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "negate_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+negate_checked = _clone_signature(negate)
+"""
+Negate the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "negate".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def power(
+    base: _NumericScalarT,
+    exponent: _NumericScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def power(
+    base: _NumericArrayT,
+    exponent: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: Expression,
+    exponent: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def power(
+    base: _NumericArrayT,
+    exponent: NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: NumericScalar,
+    exponent: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: NumericScalar,
+    exponent: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def power(
+    base: Expression,
+    exponent: NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def power(*args, **kwargs):
+    """
+    Raise arguments to power element-wise.
+
+    Integer to negative integer power returns an error. However, integer overflow
+    wraps around. If either base or exponent is null the result will be null.
+
+    Parameters
+    ----------
+    base : Array-like or scalar-like
+        Argument to compute function.
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+power_checked = _clone_signature(power)
+"""
+Raise arguments to power element-wise.
+
+An error is returned when integer to negative integer power is encountered,
+or integer overflow is encountered.
+
+Parameters
+----------
+base : Array-like or scalar-like
+    Argument to compute function.
+exponent : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def sign(
+    x: NumericOrDurationArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.NumericArray[lib.Int8Scalar]
+    | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar]
+): ...
+@overload
+def sign(
+    x: NumericOrDurationScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def sign(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def sign(*args, **kwargs):
+    """
+    Get the signedness of the arguments element-wise.
+
+    Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+    NaN values return NaN.  Integral values return signedness as Int8 and
+    floating-point values return it with the same type as the input values.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+@overload
+def sqrt(x: NumericArray, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray: ...
+@overload
+def sqrt(x: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatScalar: ...
+@overload
+def sqrt(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def sqrt(*args, **kwargs):
+    """
+    Takes the square root of arguments element-wise.
+
+    A negative argument returns a NaN.  For a variant that returns an
+    error, use function "sqrt_checked".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+sqrt_checked = _clone_signature(sqrt)
+"""
+Takes the square root of arguments element-wise.
+
+A negative argument returns an error.  For a variant that returns a
+NaN, use function "sqrt".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+subtract = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "subtract_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subtract_checked = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "subtract".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.1 Bit-wise functions =========================
+@overload
+def bit_wise_and(
+    x: _NumericScalarT, y: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT: ...
+@overload
+def bit_wise_and(
+    x: _NumericArrayT,
+    y: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: NumericScalar, y: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: _NumericArrayT, y: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def bit_wise_and(
+    x: Expression,
+    y: NumericScalar | ArrayOrChunkedArray[NumericScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def bit_wise_and(
+    x: NumericScalar | ArrayOrChunkedArray[NumericScalar],
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def bit_wise_and(*args, **kwargs):
+    """
+    Bit-wise AND the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def bit_wise_not(
+    x: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT: ...
+@overload
+def bit_wise_not(
+    x: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_not(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def bit_wise_not(*args, **kwargs):
+    """
+    Bit-wise negate the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+bit_wise_or = _clone_signature(bit_wise_and)
+"""
+Bit-wise OR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+bit_wise_xor = _clone_signature(bit_wise_and)
+"""
+Bit-wise XOR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+`x` is returned if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_left_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left_checked = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_left" for a variant that doesn't fail for an invalid shift amount.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+`x` is returned if `y` (the amount to shift by) is: (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_right_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right_checked = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_right" for a variant that doesn't fail for an invalid shift amount
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.2 Rounding functions =========================
+@overload
+def ceil(x: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT: ...
+@overload
+def ceil(x: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatArrayT: ...
+@overload
+def ceil(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def ceil(*args, **kwargs):
+    """
+    Round up to the nearest integer.
+
+    Compute the smallest integer value not less in magnitude than `x`.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor = _clone_signature(ceil)
+"""
+Round down to the nearest integer.
+
+Compute the largest integer value not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def round(
+    x: _NumericScalarT,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round(
+    x: _NumericArrayT,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round(
+    x: Expression,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round(*args, **kwargs):
+    """
+    Round to a given precision.
+
+    Options are used to control the number of digits and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def round_to_multiple(
+    x: _NumericScalarT,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round_to_multiple(
+    x: _NumericArrayT,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round_to_multiple(
+    x: Expression,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round_to_multiple(*args, **kwargs):
+    """
+    Round to a given multiple.
+
+    Options are used to control the rounding multiple and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundToMultipleOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def round_binary(
+    x: _NumericScalarT,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round_binary(
+    x: _NumericScalarT,
+    s: Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[_NumericScalarT]: ...
+@overload
+def round_binary(
+    x: _NumericArrayT,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round_binary(
+    x: Expression,
+    s: Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round_binary(*args, **kwargs):
+    """
+    Round to the given precision.
+
+    Options are used to control the rounding mode.
+    Default behavior is to use the half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    s : Array-like or scalar-like
+        Argument to compute function.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundBinaryOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+trunc = _clone_signature(ceil)
+"""
+Compute the integral part.
+
+Compute the nearest integer not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.3 Logarithmic functions =========================
+@overload
+def ln(
+    x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def ln(
+    x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def ln(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def ln(*args, **kwargs):
+    """
+    Compute natural logarithm.
+
+    Non-positive values return -inf or NaN. Null values return null.
+    Use function "ln_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ln_checked = _clone_signature(ln)
+"""
+Compute natural logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "ln" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10 = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log10_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10_checked = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log10" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p_checked" if you want invalid values to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p_checked = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p" if you want invalid values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2 = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log2_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2_checked = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log2" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def logb(
+    x: FloatScalar, b: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def logb(
+    x: FloatArray, b: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: FloatScalar,
+    b: FloatArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: FloatArray,
+    b: FloatScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: Expression | Any, b: Expression | Any, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression | Any: ...
+def logb(*args, **kwargs):
+    """
+    Compute base `b` logarithm.
+
+    Values <= 0 return -inf or NaN. Null values return null.
+    Use function "logb_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    b : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+logb_checked = _clone_signature(logb)
+"""
+Compute base `b` logarithm.
+
+Values <= 0 return -inf or NaN. Null values return null.
+Use function "logb" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+b : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.4 Trigonometric functions =========================
+acos = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "acos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+acos_checked = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+Invalid input values raise an error;
+to return NaN instead, see "acos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "asin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin_checked = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "asin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+atan = _clone_signature(ln)
+"""
+Compute the inverse tangent of x.
+
+The return value is in the range [-pi/2, pi/2];
+for a full return range [-pi, pi], see "atan2".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos = _clone_signature(ln)
+"""
+Compute the cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "cos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos_checked = _clone_signature(ln)
+"""
+Compute the cosine.
+
+Infinite values raise an error;
+to return NaN instead, see "cos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin = _clone_signature(ln)
+"""
+Compute the sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "sin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin_checked = _clone_signature(ln)
+"""
+Compute the sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "sin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan = _clone_signature(ln)
+"""
+Compute the tangent.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "tan_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan_checked = _clone_signature(ln)
+"""
+Compute the tangent.
+
+Infinite values raise an error;
+to return NaN instead, see "tan".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def atan2(
+    y: FloatScalar, x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def atan2(
+    y: FloatArray, x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: FloatArray,
+    x: FloatScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: FloatScalar,
+    x: FloatArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: Expression, x: Any, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def atan2(
+    y: Any, x: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def atan2(*args, **kwargs):
+    """
+    Compute the inverse tangent of y/x.
+
+    The return value is in the range [-pi, pi].
+
+    Parameters
+    ----------
+    y : Array-like or scalar-like
+        Argument to compute function.
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.5 Comparisons functions =========================
+@overload
+def equal(
+    x: lib.Scalar, y: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def equal(
+    x: lib.Scalar,
+    y: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: lib.Array | lib.ChunkedArray,
+    y: lib.Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: lib.Array | lib.ChunkedArray,
+    y: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def equal(
+    x: lib.Scalar,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def equal(
+    x: Expression,
+    y: lib.Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def equal(*args, **kwargs):
+    """
+    Compare values for equality (x == y).
+
+    A null on either side emits a null comparison result.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+greater = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x > y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+greater_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x >= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x < y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x <= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+not_equal = _clone_signature(equal)
+"""
+Compare values for inequality (x != y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def max_element_wise(
+    *args: ScalarOrArray[_Scalar_CoT],
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _Scalar_CoT: ...
+@overload
+def max_element_wise(
+    *args: Expression,
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def max_element_wise(*args, **kwargs):
+    """
+    Find the element-wise maximum value.
+
+    Nulls are ignored (by default) or propagated.
+    NaN is preferred over null, but not over any valid value.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    options : pyarrow.compute.ElementWiseAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+min_element_wise = _clone_signature(max_element_wise)
+"""
+Find the element-wise minimum value.
+
+Nulls are ignored (by default) or propagated.
+NaN is preferred over null, but not over any valid value.
+
+Parameters
+----------
+*args : Array-like or scalar-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+options : pyarrow.compute.ElementWiseAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.6 Logical functions =========================
+@overload
+def and_(
+    x: lib.BooleanScalar, y: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def and_(
+    x: BooleanArray,
+    y: BooleanArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: lib.BooleanScalar,
+    y: BooleanArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: BooleanArray,
+    y: lib.BooleanScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: lib.BooleanScalar,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: Expression,
+    y: lib.BooleanScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: ScalarOrArray[lib.BooleanScalar],
+    y: ScalarOrArray[lib.BooleanScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ScalarOrArray[lib.BooleanScalar]: ...
+def and_(*args, **kwargs):
+    """
+    Logical 'and' boolean values.
+
+    When a null is encountered in either input, a null is output.
+    For a different null behavior, see function "and_kleene".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+and_kleene = _clone_signature(and_)
+"""
+Logical 'and' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and null = null
+- null and true = null
+- false and null = false
+- null and false = false
+- null and null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and' false is always false.
+For a different null behavior, see function "and".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not = _clone_signature(and_)
+"""
+Logical 'and not' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "and_not_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not_kleene = _clone_signature(and_)
+"""
+Logical 'and not' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and not null = null
+- null and not false = null
+- false and not null = false
+- null and not true = false
+- null and not null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and not' true is always false, as is false
+'and not' an unknown value.
+For a different null behavior, see function "and_not".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_ = _clone_signature(and_)
+"""
+Logical 'or' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "or_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_kleene = _clone_signature(and_)
+"""
+Logical 'or' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true or null = true
+- null or true = true
+- false or null = null
+- null or false = null
+- null or null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'or' true is always true.
+For a different null behavior, see function "or".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+xor = _clone_signature(and_)
+"""
+Logical 'xor' boolean values.
+
+When a null is encountered in either input, a null is output.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def invert(
+    x: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def invert(
+    x: _BooleanArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BooleanArrayT: ...
+@overload
+def invert(
+    x: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def invert(*args, **kwargs):
+    """
+    Invert boolean values.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.10 String predicates =========================
+@overload
+def ascii_is_alnum(
+    strings: StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def ascii_is_alnum(
+    strings: StringArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def ascii_is_alnum(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def ascii_is_alnum(*args, **kwargs):
+    """
+    Classify strings as ASCII alphanumeric.
+
+    For each string in `strings`, emit true iff the string is non-empty
+    and consists only of alphanumeric ASCII characters.  Null strings emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alnum = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphanumeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphanumeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_digit = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as digits.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of Unicode digits.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_numeric = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as numeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of numeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+string_is_ascii = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII.
+
+For each string in `strings`, emit true iff the string consists only
+of ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.11 String transforms =========================
+@overload
+def ascii_capitalize(
+    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT: ...
+@overload
+def ascii_capitalize(
+    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringArrayT: ...
+@overload
+def ascii_capitalize(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def ascii_capitalize(*args, **kwargs):
+    """
+    Capitalize the first character of ASCII input.
+
+    For each string in `strings`, return a capitalized version.
+
+    This function assumes the input is fully ASCII.  If it may contain
+    non-ASCII characters, use "utf8_capitalize" instead.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lower = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_lower" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_reverse = _clone_signature(ascii_capitalize)
+"""
+Reverse ASCII input.
+
+For each ASCII string in `strings`, return a reversed version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_reverse" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_swapcase = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input by inverting casing.
+
+For each string in `strings`, return a string with opposite casing.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_swapcase" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_title = _clone_signature(ascii_capitalize)
+"""
+Titlecase each word of ASCII input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_title" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_upper = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+This function assumes the input is fully ASCII.  It it may contain
+non-ASCII characters, use "utf8_upper" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def binary_length(
+    strings: lib.BinaryScalar | lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar: ...
+@overload
+def binary_length(
+    strings: lib.LargeBinaryScalar | lib.LargeStringScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def binary_length(
+    strings: lib.BinaryArray
+    | lib.StringArray
+    | lib.ChunkedArray[lib.BinaryScalar]
+    | lib.ChunkedArray[lib.StringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def binary_length(
+    strings: lib.LargeBinaryArray
+    | lib.LargeStringArray
+    | lib.ChunkedArray[lib.LargeBinaryScalar]
+    | lib.ChunkedArray[lib.LargeStringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def binary_length(
+    strings: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_length(*args, **kwargs):
+    """
+    Compute string lengths.
+
+    For each string in `strings`, emit its length of bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryScalarT,
+    num_repeats: int,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryScalarT,
+    num_repeats: list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[_StringOrBinaryScalarT]: ...
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryArrayT,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_repeat(
+    strings: Expression,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_repeat(*args, **kwargs):
+    """
+    Repeat a binary string.
+
+    For each binary string in `strings`, return a replicated version.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    num_repeats : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_replace_slice(
+    strings: _StringOrBinaryScalarT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_replace_slice(
+    strings: _StringOrBinaryArrayT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_replace_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_replace_slice(*args, **kwargs):
+    """
+    Replace a slice of a binary string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_reverse(
+    strings: _BinaryScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryScalarT: ...
+@overload
+def binary_reverse(
+    strings: _BinaryArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryArrayT: ...
+@overload
+def binary_reverse(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def binary_reverse(*args, **kwargs):
+    """
+    Reverse binary input.
+
+    For each binary string in `strings`, return a reversed version.
+
+    This function reverses the binary data at a byte-level.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def replace_substring(
+    strings: _StringScalarT,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def replace_substring(
+    strings: _StringArrayT,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def replace_substring(
+    strings: Expression,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def replace_substring(*args, **kwargs):
+    """
+    Replace matching non-overlapping substrings with replacement.
+
+    For each string in `strings`, replace non-overlapping substrings that match
+    the given literal `pattern` with the given `replacement`.
+    If `max_replacements` is given and not equal to -1, it limits the
+    maximum amount replacements per input, counted from the left.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    options : pyarrow.compute.ReplaceSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+replace_substring_regex = _clone_signature(replace_substring)
+"""
+Replace matching non-overlapping substrings with replacement.
+
+For each string in `strings`, replace non-overlapping substrings that match
+the given regular expression `pattern` with the given `replacement`.
+If `max_replacements` is given and not equal to -1, it limits the
+maximum amount replacements per input, counted from the left.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+replacement : str
+    What to replace the pattern with.
+max_replacements : int or None, default None
+    The maximum number of strings to replace in each
+    input value (unlimited if None).
+options : pyarrow.compute.ReplaceSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def utf8_capitalize(
+    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT: ...
+@overload
+def utf8_capitalize(
+    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringArrayT: ...
+@overload
+def utf8_capitalize(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def utf8_capitalize(*args, **kwargs):
+    """
+    Capitalize the first character of input.
+
+    For each string in `strings`, return a capitalized version,
+    with the first character uppercased and the others lowercased.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def utf8_length(
+    strings: lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar: ...
+@overload
+def utf8_length(
+    strings: lib.LargeStringScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def utf8_length(
+    strings: lib.StringArray | lib.ChunkedArray[lib.StringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def utf8_length(
+    strings: lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def utf8_length(
+    strings: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_length(*args, **kwargs):
+    """
+    Compute UTF8 string lengths.
+
+    For each string in `strings`, emit its length in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_lower = _clone_signature(utf8_capitalize)
+"""
+Transform input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def utf8_replace_slice(
+    strings: _StringScalarT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def utf8_replace_slice(
+    strings: _StringArrayT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def utf8_replace_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_replace_slice(*args, **kwargs):
+    """
+    Replace a slice of a string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_reverse = _clone_signature(utf8_capitalize)
+"""
+Reverse input.
+
+For each string in `strings`, return a reversed version.
+
+This function operates on Unicode codepoints, not grapheme
+clusters. Hence, it will not correctly reverse grapheme clusters
+composed of multiple codepoints.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_swapcase = _clone_signature(utf8_capitalize)
+"""
+Transform input lowercase characters to uppercase and uppercase characters to lowercase.
+
+For each string in `strings`, return an opposite case version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_title = _clone_signature(utf8_capitalize)
+"""
+Titlecase each word of input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_upper = _clone_signature(utf8_capitalize)
+"""
+Transform input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory poo
+"""
+
+# ========================= 2.12 String padding =========================
+@overload
+def ascii_center(
+    strings: _StringScalarT,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_center(
+    strings: _StringArrayT,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_center(
+    strings: Expression,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_center(*args, **kwargs):
+    """
+    Center strings by padding with a given character.
+
+    For each string in `strings`, emit a centered string by padding both sides
+    with the given ASCII character.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    options : pyarrow.compute.PadOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_center = _clone_signature(ascii_center)
+"""
+Center strings by padding with a given character.
+
+For each string in `strings`, emit a centered string by padding both sides
+with the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.13 String trimming =========================
+@overload
+def ascii_ltrim(
+    strings: _StringScalarT,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_ltrim(
+    strings: _StringArrayT,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_ltrim(
+    strings: Expression,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_ltrim(*args, **kwargs):
+    """
+    Trim leading characters.
+
+    For each string in `strings`, remove any leading characters
+    from the `characters` option (as given in TrimOptions).
+    Null values emit null.
+    Both the `strings` and the `characters` are interpreted as
+    ASCII; to trim non-ASCII characters, use `utf8_ltrim`.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    characters : str
+        Individual characters to be trimmed from the string.
+    options : pyarrow.compute.TrimOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_rtrim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_trim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim = _clone_signature(ascii_ltrim)
+"""
+Trim leading characters.
+
+For each string in `strings`, remove any leading characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def ascii_ltrim_whitespace(
+    strings: _StringScalarT,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_ltrim_whitespace(
+    strings: _StringArrayT,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_ltrim_whitespace(
+    strings: Expression,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_ltrim_whitespace(*args, **kwargs):
+    """
+    Trim leading ASCII whitespace characters.
+
+    For each string in `strings`, emit a string with leading ASCII whitespace
+    characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode
+    whitespace characters. Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with trailing ASCII whitespace
+characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing ASCII
+whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading whitespace characters.
+
+For each string in `strings`, emit a string with leading whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing whitespace characters.
+
+For each string in `strings`, emit a string with trailing whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing
+whitespace characters removed, where whitespace characters are defined
+by the Unicode standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.14 String splitting =========================
+@overload
+def ascii_split_whitespace(
+    strings: _StringScalarT,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringScalarT]: ...
+@overload
+def ascii_split_whitespace(
+    strings: lib.Array[lib.Scalar[_DataTypeT]],
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
+@overload
+def ascii_split_whitespace(
+    strings: Expression,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_split_whitespace(*args, **kwargs):
+    """
+    Split string according to any ASCII whitespace.
+
+    Split each string according any non-zero length sequence of ASCII
+    whitespace characters.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def split_pattern(
+    strings: _StringOrBinaryScalarT,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringOrBinaryScalarT]: ...
+@overload
+def split_pattern(
+    strings: lib.Array[lib.Scalar[_DataTypeT]],
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitPatternOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
+@overload
+def split_pattern(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitPatternOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def split_pattern(*args, **kwargs):
+    """
+    Split string according to separator.
+
+    Split each string according to the exact `pattern` defined in
+    SplitPatternOptions.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitPatternOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitPatternOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+split_pattern_regex = _clone_signature(split_pattern)
+"""
+Split string according to regex pattern.
+
+Split each string according to the regex `pattern` defined in
+SplitPatternOptions.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitPatternOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    String pattern to split on.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitPatternOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
+"""
+Split string according to any Unicode whitespace.
+
+Split each string according any non-zero length sequence of Unicode
+whitespace characters.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.15 String component extraction =========================
+@overload
+def extract_regex(
+    strings: StringOrBinaryScalar,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+@overload
+def extract_regex(
+    strings: StringOrBinaryArray,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+@overload
+def extract_regex(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def extract_regex(*args, **kwargs):
+    """
+    Extract substrings captured by a regex pattern.
+
+    For each string in `strings`, match the regular expression and, if
+    successful, emit a struct with field names and values coming from the
+    regular expression's named capture groups. If the input is null or the
+    regular expression fails matching, a null output value is emitted.
+
+    Regular expression matching is done using the Google RE2 library.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Regular expression with named capture fields.
+    options : pyarrow.compute.ExtractRegexOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
+) -> StringScalar | StringArray:
+    """
+    Join a list of strings together with a separator.
+
+    Concatenate the strings in `list`. The `separator` is inserted
+    between each given string.
+    Any null input and any null `list` element emits a null output.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    separator : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_join_element_wise(
+    *strings: _StringOrBinaryScalarT,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_join_element_wise(
+    *strings: _StringOrBinaryArrayT,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_join_element_wise(
+    *strings: Expression,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_join_element_wise(*args, **kwargs):
+    """
+    Join string arguments together, with the last argument as separator.
+
+    Concatenate the `strings` except for the last one. The last argument
+    in `strings` is inserted between each given string.
+    Any null separator element emits a null output. Null elements either
+    emit a null (the default), are skipped, or replaced with a given string.
+
+    Parameters
+    ----------
+    *strings : Array-like or scalar-like
+        Argument to compute function.
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    options : pyarrow.compute.JoinOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.17 String Slicing =========================
+@overload
+def binary_slice(
+    strings: _BinaryScalarT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryScalarT: ...
+@overload
+def binary_slice(
+    strings: _BinaryArrayT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryArrayT: ...
+@overload
+def binary_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_slice(*args, **kwargs):
+    """
+    Slice binary string.
+
+    For each binary string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    bytes.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def utf8_slice_codeunits(
+    strings: _StringScalarT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def utf8_slice_codeunits(
+    strings: _StringArrayT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def utf8_slice_codeunits(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_slice_codeunits(*args, **kwargs):
+    """
+    Slice string.
+
+    For each string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    UTF8 codeunits.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.18 Containment tests =========================
+@overload
+def count_substring(
+    strings: lib.StringScalar | lib.BinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar: ...
+@overload
+def count_substring(
+    strings: lib.LargeStringScalar | lib.LargeBinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def count_substring(
+    strings: lib.StringArray
+    | lib.BinaryArray
+    | lib.ChunkedArray[lib.StringScalar]
+    | lib.ChunkedArray[lib.BinaryScalar],
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def count_substring(
+    strings: lib.LargeStringArray
+    | lib.LargeBinaryArray
+    | lib.ChunkedArray[lib.LargeStringScalar]
+    | lib.ChunkedArray[lib.LargeBinaryScalar],
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def count_substring(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def count_substring(*args, **kwargs):
+    """
+    Count occurrences of substring.
+
+    For each string in `strings`, emit the number of occurrences of the given
+    literal pattern.
+    Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+count_substring_regex = _clone_signature(count_substring)
+"""
+Count occurrences of substring.
+
+For each string in `strings`, emit the number of occurrences of the given
+regular expression pattern.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def ends_with(
+    strings: StringScalar | BinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def ends_with(
+    strings: StringArray | BinaryArray,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def ends_with(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ends_with(*args, **kwargs):
+    """
+    Check if strings end with a literal pattern.
+
+    For each string in `strings`, emit true iff it ends with a given pattern.
+    The pattern must be given in MatchSubstringOptions.
+    If ignore_case is set, only simple case folding is performed.
+
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+find_substring = _clone_signature(count_substring)
+"""
+Find first occurrence of substring.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+find_substring_regex = _clone_signature(count_substring)
+"""
+Find location of first match of regex pattern.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def index_in(
+    values: lib.Scalar,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar: ...
+@overload
+def index_in(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def index_in(
+    values: Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def index_in(*args, **kwargs):
+    """
+    Return index of each element in a set of values.
+
+    For each element in `values`, return its index in a given set of
+    values, or null if it is not found there.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_in(
+    values: lib.Scalar,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_in(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_in(
+    values: Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_in(*args, **kwargs):
+    """
+    Find each element in a set of values.
+
+    For each element in `values`, return true if it is found in a given
+    set of values, false otherwise.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+match_like = _clone_signature(ends_with)
+"""
+Match strings against SQL-style LIKE pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. '%' will match any number of characters, '_' will
+match exactly one character, and any other character matches itself.
+To match a literal '%', '_', or '\', precede the character with a backslash.
+Null inputs emit null.  The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring = _clone_signature(ends_with)
+"""
+Match strings against literal pattern.
+
+For each string in `strings`, emit true iff it contains a given pattern.
+Null inputs emit null.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring_regex = _clone_signature(ends_with)
+"""
+Match strings against regex pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+starts_with = _clone_signature(ends_with)
+"""
+Check if strings start with a literal pattern.
+
+For each string in `strings`, emit true iff it starts with a given pattern.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.19 Categorizations =========================
+@overload
+def is_finite(
+    values: NumericScalar | lib.NullScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_finite(
+    values: NumericArray | lib.NullArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def is_finite(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def is_finite(*args, **kwargs):
+    """
+    Return true if value is finite.
+
+    For each input value, emit true iff the value is finite
+    (i.e. neither NaN, inf, nor -inf).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+is_inf = _clone_signature(is_finite)
+"""
+Return true if infinity.
+
+For each input value, emit true iff the value is infinite (inf or -inf).
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+is_nan = _clone_signature(is_finite)
+"""
+Return true if NaN.
+
+For each input value, emit true iff the value is NaN.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def is_null(
+    values: lib.Scalar,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_null(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_null(
+    values: Expression,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_null(*args, **kwargs):
+    """
+    Return true if null (and optionally NaN).
+
+    For each input value, emit true iff the value is null.
+    True may also be emitted for NaN values by setting the `nan_is_null` flag.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    options : pyarrow.compute.NullOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_valid(
+    values: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_valid(
+    values: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def is_valid(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def is_valid(*args, **kwargs):
+    """
+    Return true if non-null.
+
+    For each input value, emit true iff the value is valid (i.e. non-null).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+true_unless_null = _clone_signature(is_valid)
+"""
+Return true if non-null, else return null.
+
+For each input value, emit true iff the value
+is valid (non-null), otherwise emit null.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.20 Selecting / multiplexing =========================
+def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values based on multiple conditions.
+
+    `cond` must be a struct of Boolean values. `cases` can be a mix
+    of scalar and array arguments (of any type, but all must be the
+    same type or castable to a common type), with either exactly one
+    datum per child of `cond`, or one more `cases` than children of
+    `cond` (in which case we have an "else" value).
+
+    Each row of the output will be the corresponding value of the
+    first datum in `cases` for which the corresponding child of `cond`
+    is true, or otherwise the "else" value (if given), or null.
+
+    Essentially, this implements a switch-case or if-else, if-else... statement.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    *cases : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values from several arrays.
+
+    For each row, the value of the first argument is used as a 0-based index
+    into the list of `values` arrays (i.e. index 0 selects the first of the
+    `values` arrays). The output value is the corresponding value of the
+    selected argument.
+
+    If an index is null, the output will be null.
+
+    Parameters
+    ----------
+    indices : Array-like or scalar-like
+        Argument to compute function.
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def coalesce(
+    *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT:
+    """
+    Select the first non-null value.
+
+    Each row of the output will be the value from the first corresponding input
+    for which the value is not null. If all inputs are null in a row, the output
+    will be null.
+
+    Parameters
+    ----------
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+fill_null = coalesce
+"""Replace each null element in values with a corresponding
+element from fill_value.
+
+If fill_value is scalar-like, then every null element in values
+will be replaced with fill_value. If fill_value is array-like,
+then the i-th element in values will be replaced with the i-th
+element in fill_value.
+
+The fill_value's type must be the same as that of values, or it
+must be able to be implicitly casted to the array's type.
+
+This is an alias for :func:`coalesce`.
+
+Parameters
+----------
+values : Array, ChunkedArray, or Scalar-like object
+    Each null element is replaced with the corresponding value
+    from fill_value.
+fill_value : Array, ChunkedArray, or Scalar-like object
+    If not same type as values, will attempt to cast.
+
+Returns
+-------
+result : depends on inputs
+    Values with all null elements replaced
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+>>> fill_value = pa.scalar(5, type=pa.int8())
+>>> arr.fill_null(fill_value)
+<pyarrow.lib.Int8Array object at ...>
+[
+    1,
+    2,
+    5,
+    3
+]
+>>> arr = pa.array([1, 2, None, 4, None])
+>>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
+<pyarrow.lib.Int64Array object at ...>
+[
+    1,
+    2,
+    30,
+    4,
+    50
+]
+"""
+
+def if_else(
+    cond: ArrayLike | ScalarLike,
+    left: ArrayLike | ScalarLike,
+    right: ArrayLike | ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike:
+    """
+    Choose values based on a condition.
+
+    `cond` must be a Boolean scalar/ array.
+    `left` or `right` must be of the same type scalar/ array.
+    `null` values in `cond` will be promoted to the output.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    left : Array-like or scalar-like
+        Argument to compute function.
+    right : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.21 Structural transforms =========================
+
+@overload
+def list_value_length(
+    lists: _ListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def list_value_length(
+    lists: _LargeListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def list_value_length(
+    lists: ListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array | lib.Int64Array: ...
+@overload
+def list_value_length(
+    lists: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def list_value_length(*args, **kwargs):
+    """
+    Compute list lengths.
+
+    `lists` must have a list-like type.
+    For each non-null value in `lists`, its length is emitted.
+    Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def make_struct(
+    *args: lib.Scalar,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+@overload
+def make_struct(
+    *args: lib.Array | lib.ChunkedArray,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+@overload
+def make_struct(
+    *args: Expression,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def make_struct(*args, **kwargs):
+    """
+    Wrap Arrays into a StructArray.
+
+    Names of the StructArray's fields are
+    specified through MakeStructOptions.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    options : pyarrow.compute.MakeStructOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.22 Conversions =========================
+@overload
+def ceil_temporal(
+    timestamps: _TemporalScalarT,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalScalarT: ...
+@overload
+def ceil_temporal(
+    timestamps: _TemporalArrayT,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalArrayT: ...
+@overload
+def ceil_temporal(
+    timestamps: Expression,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ceil_temporal(*args, **kwargs):
+    """
+    Round temporal values up to nearest multiple of specified time unit.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    options : pyarrow.compute.RoundTemporalOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values down to nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+round_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values to the nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def cast(
+    arr: lib.Scalar,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[_DataTypeT]: ...
+@overload
+def cast(
+    arr: lib.Array,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
+@overload
+def cast(
+    arr: lib.ChunkedArray,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+def cast(*args, **kwargs):
+    """
+    Cast array values to another data type. Can also be invoked as an array
+    instance method.
+
+    Parameters
+    ----------
+    arr : Array-like
+    target_type : DataType or str
+        Type to cast to
+    safe : bool, default True
+        Check for overflows or other unsafe conversions
+    options : CastOptions, default None
+        Additional checks pass by CastOptions
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> import pyarrow as pa
+    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
+    >>> arr.type
+    TimestampType(timestamp[us])
+
+    You can use ``pyarrow.DataType`` objects to specify the target type:
+
+    >>> cast(arr, pa.timestamp("ms"))
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+
+    >>> cast(arr, pa.timestamp("ms")).type
+    TimestampType(timestamp[ms])
+
+    Alternatively, it is also supported to use the string aliases for these
+    types:
+
+    >>> arr.cast("timestamp[ms]")
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+    >>> arr.cast("timestamp[ms]").type
+    TimestampType(timestamp[ms])
+
+    Returns
+    -------
+    casted : Array
+        The cast result as a new Array
+    """
+
+@overload
+def strftime(
+    timestamps: TemporalScalar,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringScalar: ...
+@overload
+def strftime(
+    timestamps: TemporalArray,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringArray: ...
+@overload
+def strftime(
+    timestamps: Expression,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def strftime(*args, **kwargs):
+    """
+    Format temporal values according to a format string.
+
+    For each input value, emit a formatted string.
+    The time format string and locale can be set using StrftimeOptions.
+    The output precision of the "%S" (seconds) format code depends on
+    the input time precision: it is an integer for timestamps with
+    second precision, a real number with the required number of fractional
+    digits for higher precisions.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database, or if the specified locale
+    does not exist on this system.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    options : pyarrow.compute.StrftimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def strptime(
+    strings: StringScalar,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar: ...
+@overload
+def strptime(
+    strings: StringArray,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def strptime(
+    strings: Expression,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def strptime(*args, **kwargs):
+    """
+    Parse timestamps.
+
+    For each string in `strings`, parse it as a timestamp.
+    The timestamp unit and the expected string pattern must be given
+    in StrptimeOptions. Null inputs emit null. If a non-null string
+    fails parsing, an error is returned by default.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    options : pyarrow.compute.StrptimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.23 Temporal component extraction =========================
+@overload
+def day(
+    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar: ...
+@overload
+def day(
+    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array: ...
+@overload
+def day(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def day(*args, **kwargs):
+    """
+    Extract day number.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def day_of_week(
+    values: TemporalScalar,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def day_of_week(
+    values: TemporalArray,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def day_of_week(
+    values: Expression,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def day_of_week(*args, **kwargs):
+    """
+    Extract day of the week number.
+
+    By default, the week starts on Monday represented by 0 and ends on Sunday
+    represented by 6.
+    `DayOfWeekOptions.week_start` can be used to set another starting day using
+    the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).
+    Day numbers can start at 0 or 1 based on `DayOfWeekOptions.count_from_zero`.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+day_of_year = _clone_signature(day)
+"""
+Extract day of year number.
+
+January 1st maps to day number 1, February 1st to 32, etc.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def hour(
+    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def hour(
+    values: lib.TimestampArray[Any]
+    | lib.Time32Array[Any]
+    | lib.Time64Array[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]]
+    | lib.ChunkedArray[lib.Time32Scalar[Any]]
+    | lib.ChunkedArray[lib.Time64Scalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def hour(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def hour(*args, **kwargs):
+    """
+    Extract hour value.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_dst(
+    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_dst(
+    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_dst(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def is_dst(*args, **kwargs):
+    """
+    Extracts if currently observing daylight savings.
+
+    IsDaylightSavings returns true if a timestamp has a daylight saving
+    offset in the given timezone.
+    Null values emit null.
+    An error is returned if the values do not have a defined timezone.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def iso_week(
+    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar: ...
+@overload
+def iso_week(
+    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def iso_week(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def iso_week(*args, **kwargs):
+    """
+    Extract ISO week of year number.
+
+    First ISO week has the majority (4 or more) of its days in January.
+    ISO week starts on Monday. The week number starts with 1 and can run
+    up to 53.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+iso_year = _clone_signature(iso_week)
+"""
+Extract ISO year number.
+
+First week of an ISO year has the majority (4 or more) of its days in January.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def is_leap_year(
+    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_leap_year(
+    values: lib.TimestampArray
+    | lib.Date32Array
+    | lib.Date64Array
+    | lib.ChunkedArray[lib.TimestampScalar]
+    | lib.ChunkedArray[lib.Date32Scalar]
+    | lib.ChunkedArray[lib.Date64Scalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_leap_year(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_leap_year(*args, **kwargs):
+    """
+    Extract if year is a leap year.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+microsecond = _clone_signature(iso_week)
+"""
+Extract microsecond values.
+
+Microsecond returns number of microseconds since the last full millisecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+millisecond = _clone_signature(iso_week)
+"""
+Extract millisecond values.
+
+Millisecond returns number of milliseconds since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minute = _clone_signature(iso_week)
+"""
+Extract minute values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+month = _clone_signature(day_of_week)
+"""
+Extract month number.
+
+Month is encoded as January=1, December=12.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+nanosecond = _clone_signature(hour)
+"""
+Extract nanosecond values.
+
+Nanosecond returns number of nanoseconds since the last full microsecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarter = _clone_signature(day_of_week)
+"""
+Extract quarter of year number.
+
+First quarter maps to 1 and forth quarter maps to 4.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+second = _clone_signature(hour)
+"""
+Extract second values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subsecond = _clone_signature(hour)
+"""
+Extract subsecond values.
+
+Subsecond returns the fraction of a second since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_week = _clone_signature(iso_week)
+"""
+Extract US week of year number.
+
+First US week has the majority (4 or more) of its days in January.
+US week starts on Monday. The week number starts with 1 and can run
+up to 53.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_year = _clone_signature(iso_week)
+"""
+Extract US epidemiological year number.
+
+First week of US epidemiological year has the majority (4 or more) of
+it's days in January. Last week of US epidemiological year has the
+year's last Wednesday in it. US epidemiological week starts on Sunday.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+year = _clone_signature(iso_week)
+"""
+Extract year number.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def week(
+    values: lib.TimestampScalar,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def week(
+    values: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def week(
+    values: Expression,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def week(*args, **kwargs):
+    """
+    Extract week of year number.
+
+    First week has the majority (4 or more) of its days in January.
+    Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using
+    DayOfWeekOptions.count_from_zero.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    options : pyarrow.compute.WeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def year_month_day(
+    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar: ...
+@overload
+def year_month_day(
+    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray: ...
+@overload
+def year_month_day(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def year_month_day(*args, **kwargs):
+    """
+    Extract (year, month, day) struct.
+
+    Null values emit null.
+    An error is returned in the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.24 Temporal difference =========================
+def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of days and milliseconds between two timestamps.
+
+    Returns the number of days and milliseconds from `start` to `end`.
+    That is, first the difference in days is computed as if both
+    timestamps were truncated to the day, then the difference between time times
+    of the two timestamps is computed as if both times were truncated to the
+    millisecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def days_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of days between two timestamps.
+
+    Returns the number of day boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the day.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+hours_between = _clone_signature(days_between)
+"""
+Compute the number of hours between two timestamps.
+
+Returns the number of hour boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the hour.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+microseconds_between = _clone_signature(days_between)
+"""
+Compute the number of microseconds between two timestamps.
+
+Returns the number of microsecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the microsecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+milliseconds_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minutes_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+In [152]: print(pc.minutes_between.__doc__)
+Compute the number of minute boundaries between two timestamps.
+
+Returns the number of minute boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the minute.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def month_day_nano_interval_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray:
+    """
+    Compute the number of months, days and nanoseconds between two timestamps.
+
+    Returns the number of months, days, and nanoseconds from `start` to `end`.
+    That is, first the difference in months is computed as if both timestamps
+    were truncated to the months, then the difference between the days
+    is computed, and finally the difference between the times of the two
+    timestamps is computed as if both times were truncated to the nanosecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of months between two timestamps.
+
+    Returns the number of month boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the month.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+nanoseconds_between = _clone_signature(days_between)
+"""
+Compute the number of nanoseconds between two timestamps.
+
+Returns the number of nanosecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the nanosecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarters_between = _clone_signature(days_between)
+"""
+Compute the number of quarters between two timestamps.
+
+Returns the number of quarter start boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the quarter.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+seconds_between = _clone_signature(days_between)
+"""
+Compute the number of seconds between two timestamps.
+
+Returns the number of second boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the second.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def weeks_between(
+    start,
+    end,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of weeks between two timestamps.
+
+    Returns the number of week boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the week.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+years_between = _clone_signature(days_between)
+"""
+Compute the number of years between two timestamps.
+
+Returns the number of year boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the year.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.25 Timezone handling =========================
+@overload
+def assume_timezone(
+    timestamps: lib.TimestampScalar,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar: ...
+@overload
+def assume_timezone(
+    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def assume_timezone(
+    timestamps: Expression,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def assume_timezone(*args, **kwargs):
+    """
+    Convert naive timestamp to timezone-aware timestamp.
+
+    Input timestamps are assumed to be relative to the timezone given in the
+    `timezone` option. They are converted to UTC-relative timestamps and
+    the output type has its timezone set to the value of the `timezone`
+    option. Null values emit null.
+    This function is meant to be used when an external system produces
+    "timezone-naive" timestamps which need to be converted to
+    "timezone-aware" timestamps. An error is returned if the timestamps
+    already have a defined timezone.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    options : pyarrow.compute.AssumeTimezoneOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def local_timestamp(
+    timestamps: lib.TimestampScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.TimestampScalar: ...
+@overload
+def local_timestamp(
+    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def local_timestamp(
+    timestamps: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def local_timestamp(*args, **kwargs):
+    """
+    Convert timestamp to a timezone-naive local time timestamp.
+
+    LocalTimestamp converts timezone-aware timestamp to local timestamp
+    of the given timestamp's timezone and removes timezone metadata.
+    Alternative name for this timestamp is also wall clock time.
+    If input is in UTC or without timezone, then unchanged input values
+    without timezone metadata are returned.
+    Null values emit null.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.26 Random number generation =========================
+def random(
+    n: int,
+    *,
+    initializer: Literal["system"] | int = "system",
+    options: RandomOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Generate numbers in the range [0, 1).
+
+    Generated values are uniformly-distributed, double-precision
+    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
+
+    Parameters
+    ----------
+    n : int
+        Number of values to generate, must be greater than or equal to 0
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    options : pyarrow.compute.RandomOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3. Array-wise (“vector”) functions =========================
+
+# ========================= 3.1 Cumulative Functions =========================
+@overload
+def cumulative_sum(
+    values: _NumericArrayT,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def cumulative_sum(
+    values: Expression,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def cumulative_sum(*args, **kwargs):
+    """
+    Compute the cumulative sum over a numeric input.
+
+    `values` must be numeric. Return an array/chunked array which is the
+    cumulative sum computed over `values`. Results will wrap around on
+    integer overflow. Use function "cumulative_sum_checked" if you want
+    overflow to return an error. The default start is 0.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    options : pyarrow.compute.CumulativeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+cumulative_sum_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative sum over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative sum computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_sum". The default start is 0.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. Results will wrap around on
+integer overflow. Use function "cumulative_prod_checked" if you want
+overflow to return an error. The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_prod". The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_max = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_min = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative min over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative min computed over `values`. The default start is the maximum
+value of input type (so that any other value will replace the
+start as the new minimum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_mean = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+# ========================= 3.2 Associative transforms =========================
+
+@overload
+def dictionary_encode(
+    array: _ScalarOrArrayT,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT: ...
+@overload
+def dictionary_encode(
+    array: Expression,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def unique(array: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def unique(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+@overload
+def value_counts(
+    array: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray: ...
+@overload
+def value_counts(
+    array: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+# ========================= 3.3 Selections =========================
+@overload
+def array_filter(
+    array: _ArrayT,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_filter(
+    array: Expression,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def array_take(
+    array: _ArrayT,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_take(
+    array: Expression,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def drop_null(input: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def drop_null(
+    input: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+filter = array_filter
+take = array_take
+"""
+Select values (or records) from array- or table-like data given integer
+selection indices.
+
+The result will be of the same type(s) as the input, with elements taken
+from the input array (or record batch / table fields) at the given
+indices. If an index is null then the corresponding value in the output
+will be null.
+
+Parameters
+----------
+data : Array, ChunkedArray, RecordBatch, or Table
+indices : Array, ChunkedArray
+    Must be of integer type
+boundscheck : boolean, default True
+    Whether to boundscheck the indices. If False and there is an out of
+    bounds index, will likely cause the process to crash.
+memory_pool : MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+Returns
+-------
+result : depends on inputs
+    Selected values for the given indices
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+>>> indices = pa.array([0, None, 4, 3])
+>>> arr.take(indices)
+<pyarrow.lib.StringArray object at ...>
+[
+    "a",
+    null,
+    "e",
+    null
+]
+"""
+
+# ========================= 3.4 Containment tests  =========================
+@overload
+def indices_nonzero(
+    values: lib.BooleanArray
+    | lib.NullArray
+    | NumericArray
+    | lib.Decimal128Array
+    | lib.Decimal256Array,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def indices_nonzero(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def indices_nonzero(*args, **kwargs):
+    """
+    Return the indices of the values in the array that are non-zero.
+
+    For each input value, check if it's zero, false or null. Emit the index
+    of the value in the array if it's none of the those.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.5 Sorts and partitions  =========================
+@overload
+def array_sort_indices(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def array_sort_indices(
+    array: Expression,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def array_sort_indices(*args, **kwargs):
+    """
+    Return the indices that would sort an array.
+
+    This function computes an array of indices that define a stable sort
+    of the input array.  By default, Null values are considered greater
+    than any other value and are therefore sorted at the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in ArraySortOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.ArraySortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def partition_nth_indices(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def partition_nth_indices(
+    array: Expression,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def partition_nth_indices(*args, **kwargs):
+    """
+    Return the indices that would partition an array around a pivot.
+
+    This functions computes an array of indices that define a non-stable
+    partial sort of the input array.
+
+    The output is such that the `N`'th index points to the `N`'th element
+    of the input in sorted order, and all indices before the `N`'th point
+    to elements in the input less or equal to elements at or after the `N`'th.
+
+    By default, null values are considered greater than any other value
+    and are therefore partitioned towards the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The pivot index `N` must be given in PartitionNthOptions.
+    The handling of nulls and NaNs can also be changed in PartitionNthOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.PartitionNthOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def rank(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    options: RankOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array:
+    """
+    Compute ordinal ranks of an array (1-based).
+
+    This function computes a rank of the input array.
+    By default, null values are considered greater than any other value and
+    are therefore sorted at the end of the input. For floating-point types,
+    NaNs are considered greater than any other non-null value, but smaller
+    than null values. The default tiebreaker is to assign ranks in order of
+    when ties appear in the input.
+
+    The handling of nulls, NaNs and tiebreakers can be changed in RankOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    options : pyarrow.compute.RankOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def select_k_unstable(
+    input: Expression,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def select_k_unstable(*args, **kwargs):
+    """
+    Select the indices of the first `k` ordered elements from the input.
+
+    This function selects an array of indices of the first `k` ordered elements
+    from the `input` array, record batch or table specified in the column keys
+    (`options.sort_keys`). Output is not guaranteed to be stable.
+    Null values are considered greater than any other value and are
+    therefore ordered at the end. For floating-point types, NaNs are considered
+    greater than any other non-null value, but smaller than null values.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    options : pyarrow.compute.SelectKOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def sort_indices(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def sort_indices(
+    input: Expression,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def sort_indices(*args, **kwargs):
+    """
+    Return the indices that would sort an array, record batch or table.
+
+    This function computes an array of indices that define a stable sort
+    of the input array, record batch or table.  By default, null values are
+    considered greater than any other value and are therefore sorted at the
+    end of the input. For floating-point types, NaNs are considered greater
+    than any other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in SortOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.SortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.6 Structural transforms =========================
+@overload
+def list_element(
+    lists: Expression, index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def list_element(
+    lists: lib.Array[ListScalar[_DataTypeT]],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
+@overload
+def list_element(
+    lists: lib.ChunkedArray[ListScalar[_DataTypeT]],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+@overload
+def list_element(
+    lists: ListScalar[_DataTypeT],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _DataTypeT: ...
+def list_element(*args, **kwargs):
+    """
+    Compute elements using of nested list values using an index.
+
+    `lists` must have a list-like type.
+    For each value in each list of `lists`, the element at `index`
+    is emitted. Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    index : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_flatten(
+    lists: Expression,
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def list_flatten(
+    lists: ArrayOrChunkedArray[ListScalar[Any]],
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any]: ...
+def list_flatten(*args, **kwargs):
+    """
+    Flatten list values.
+
+    `lists` must have a list-like type (lists, list-views, and
+    fixed-size lists).
+    Return an array with the top list level flattened unless
+    `recursive` is set to true in ListFlattenOptions. When that
+    is that case, flattening happens recursively until a non-list
+    array is formed.
+
+    Null list values do not emit anything to the output.
+
+    Parameters
+    ----------
+    lists : Array-like
+        Argument to compute function.
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    options : pyarrow.compute.ListFlattenOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_parent_indices(
+    lists: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def list_parent_indices(
+    lists: ArrayOrChunkedArray[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array: ...
+def list_parent_indices(*args, **kwargs):
+    """
+    Compute parent indices of nested list values.
+
+    `lists` must have a list-like or list-view type.
+    For each value in each list of `lists`, the top-level list index
+    is emitted.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_slice(
+    lists: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def list_slice(
+    lists: ArrayOrChunkedArray[Any],
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any]: ...
+def list_slice(*args, **kwargs):
+    """
+    Compute slice of list-like array.
+
+    `lists` must have a list-like type.
+    For each list element, compute a slice, returning a new list array.
+    A variable or fixed size list array is returned, depending on options.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    options : pyarrow.compute.ListSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def map_lookup(
+    container,
+    /,
+    query_key,
+    occurrence: str,
+    *,
+    options: MapLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Find the items corresponding to a given key in a Map.
+
+    For a given query key (passed via MapLookupOptions), extract
+    either the FIRST, LAST or ALL items from a Map that have
+    matching keys.
+
+    Parameters
+    ----------
+    container : Array-like or scalar-like
+        Argument to compute function.
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    options : pyarrow.compute.MapLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def struct_field(
+    values,
+    /,
+    indices,
+    *,
+    options: StructFieldOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Extract children of a struct or union by index.
+
+    Given a list of indices (passed via StructFieldOptions), extract
+    the child array or scalar with the given child index, recursively.
+
+    For union inputs, nulls are emitted for union values that reference
+    a different child than specified. Also, the indices are always
+    in physical order, not logical type codes - for example, the first
+    child is always index 0.
+
+    An empty list of indices returns the argument unchanged.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    options : pyarrow.compute.StructFieldOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values backward to fill null slots.
+
+    Given an array, propagate next valid observation backward to previous valid
+    or nothing if all next values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values forward to fill null slots.
+
+    Given an array, propagate last valid observation forward to next valid
+    or nothing if all previous values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def replace_with_mask(
+    values,
+    mask: list[bool] | list[bool | None] | BooleanArray,
+    replacements,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Replace items selected with a mask.
+
+    Given an array and a boolean mask (either scalar or of equal length),
+    along with replacement values (either scalar or array),
+    each element of the array for which the corresponding mask element is
+    true will be replaced by the next value from the replacements,
+    or with null if the mask is null.
+    Hence, for replacement arrays, len(replacements) == sum(mask == true).
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    mask : Array-like
+        Argument to compute function.
+    replacements : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.7 Pairwise functions =========================
+@overload
+def pairwise_diff(
+    input: _NumericOrTemporalArrayT,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def pairwise_diff(
+    input: Expression,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def pairwise_diff(*args, **kwargs):
+    """
+    Compute first order difference of an array.
+
+    Computes the first order difference of an array, It internally calls
+    the scalar function "subtract" to compute
+     differences, so its
+    behavior and supported types are the same as
+    "subtract". The period can be specified in :struct:`PairwiseOptions`.
+
+    Results will wrap around on integer overflow. Use function
+    "pairwise_diff_checked" if you want overflow to return an error.
+
+    Parameters
+    ----------
+    input : Array-like
+        Argument to compute function.
+    period : int, default 1
+        Period for applying the period function.
+    options : pyarrow.compute.PairwiseOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+pairwise_diff_checked = _clone_signature(pairwise_diff)
+"""
+Compute first order difference of an array.
+
+Computes the first order difference of an array, It internally calls
+the scalar function "subtract_checked" (or the checked variant) to compute
+differences, so its behavior and supported types are the same as
+"subtract_checked". The period can be specified in :struct:`PairwiseOptions`.
+
+This function returns an error on overflow. For a variant that doesn't
+fail on overflow, use function "pairwise_diff".
+
+Parameters
+----------
+input : Array-like
+    Argument to compute function.
+period : int, default 1
+    Period for applying the period function.
+options : pyarrow.compute.PairwiseOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
diff --git a/python/stubs/csv.pyi b/python/stubs/csv.pyi
new file mode 100644
index 00000000000..510229d7e72
--- /dev/null
+++ b/python/stubs/csv.pyi
@@ -0,0 +1,27 @@
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/stubs/cuda.pyi b/python/stubs/cuda.pyi
new file mode 100644
index 00000000000..e11baf7d4e7
--- /dev/null
+++ b/python/stubs/cuda.pyi
@@ -0,0 +1,25 @@
+from pyarrow._cuda import (
+    BufferReader,
+    BufferWriter,
+    Context,
+    CudaBuffer,
+    HostBuffer,
+    IpcMemHandle,
+    new_host_buffer,
+    read_message,
+    read_record_batch,
+    serialize_record_batch,
+)
+
+__all__ = [
+    "BufferReader",
+    "BufferWriter",
+    "Context",
+    "CudaBuffer",
+    "HostBuffer",
+    "IpcMemHandle",
+    "new_host_buffer",
+    "read_message",
+    "read_record_batch",
+    "serialize_record_batch",
+]
diff --git a/python/stubs/dataset.pyi b/python/stubs/dataset.pyi
new file mode 100644
index 00000000000..98f1a38aa85
--- /dev/null
+++ b/python/stubs/dataset.pyi
@@ -0,0 +1,229 @@
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+
+from _typeshed import StrPath
+from pyarrow._dataset import (
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FeatherFileFormat,
+    FileFormat,
+    FileFragment,
+    FilenamePartitioning,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    InMemoryDataset,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    JsonFileFormat,
+    JsonFragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    WrittenFile,
+    get_partition_keys,
+)
+from pyarrow._dataset_orc import OrcFileFormat
+from pyarrow._dataset_parquet import (
+    ParquetDatasetFactory,
+    ParquetFactoryOptions,
+    ParquetFileFormat,
+    ParquetFileFragment,
+    ParquetFileWriteOptions,
+    ParquetFragmentScanOptions,
+    ParquetReadOptions,
+    RowGroupInfo,
+)
+from pyarrow._dataset_parquet_encryption import (
+    ParquetDecryptionConfig,
+    ParquetEncryptionConfig,
+)
+from pyarrow.compute import Expression, field, scalar
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+
+from ._fs import SupportedFileSystem
+
+_orc_available: bool
+_parquet_available: bool
+
+__all__ = [
+    "CsvFileFormat",
+    "CsvFragmentScanOptions",
+    "Dataset",
+    "DatasetFactory",
+    "DirectoryPartitioning",
+    "FeatherFileFormat",
+    "FileFormat",
+    "FileFragment",
+    "FilenamePartitioning",
+    "FileSystemDataset",
+    "FileSystemDatasetFactory",
+    "FileSystemFactoryOptions",
+    "FileWriteOptions",
+    "Fragment",
+    "FragmentScanOptions",
+    "HivePartitioning",
+    "InMemoryDataset",
+    "IpcFileFormat",
+    "IpcFileWriteOptions",
+    "JsonFileFormat",
+    "JsonFragmentScanOptions",
+    "Partitioning",
+    "PartitioningFactory",
+    "Scanner",
+    "TaggedRecordBatch",
+    "UnionDataset",
+    "UnionDatasetFactory",
+    "WrittenFile",
+    "get_partition_keys",
+    # Orc
+    "OrcFileFormat",
+    # Parquet
+    "ParquetDatasetFactory",
+    "ParquetFactoryOptions",
+    "ParquetFileFormat",
+    "ParquetFileFragment",
+    "ParquetFileWriteOptions",
+    "ParquetFragmentScanOptions",
+    "ParquetReadOptions",
+    "RowGroupInfo",
+    # Parquet Encryption
+    "ParquetDecryptionConfig",
+    "ParquetEncryptionConfig",
+    # Compute
+    "Expression",
+    "field",
+    "scalar",
+    # Dataset
+    "partitioning",
+    "parquet_dataset",
+    "write_dataset",
+]
+
+_DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
+
+@overload
+def partitioning(
+    schema: Schema,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    field_names: list[str],
+    *,
+    flavor: Literal["filename"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    *,
+    flavor: Literal["hive"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+def parquet_dataset(
+    metadata_path: StrPath,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    format: ParquetFileFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | None = None,
+    partition_base_dir: str | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: StrPath | Sequence[StrPath],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: list[Dataset],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> UnionDataset: ...
+@overload
+def dataset(
+    source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+@overload
+def dataset(
+    source: RecordBatch | Table,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+def write_dataset(
+    data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
+    base_dir: StrPath,
+    *,
+    basename_template: str | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    partitioning_flavor: str | None = None,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    file_options: FileWriteOptions | None = None,
+    use_threads: bool = True,
+    max_partitions: int = 1024,
+    max_open_files: int = 1024,
+    max_rows_per_file: int = 0,
+    min_rows_per_group: int = 0,
+    max_rows_per_group: int = 1024 * 1024,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+    create_dir: bool = True,
+): ...
diff --git a/python/stubs/feather.pyi b/python/stubs/feather.pyi
new file mode 100644
index 00000000000..9451ee15763
--- /dev/null
+++ b/python/stubs/feather.pyi
@@ -0,0 +1,50 @@
+from typing import IO, Literal
+
+import pandas as pd
+
+from _typeshed import StrPath
+from pyarrow._feather import FeatherError
+from pyarrow.lib import Table
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str | list[str], validate_schema: bool = True) -> None: ...
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+def check_chunked_overflow(name: str, col) -> None: ...
+def write_feather(
+    df: pd.DataFrame | Table,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+def read_feather(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+def read_table(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
diff --git a/python/stubs/flight.pyi b/python/stubs/flight.pyi
new file mode 100644
index 00000000000..9b806ccf305
--- /dev/null
+++ b/python/stubs/flight.pyi
@@ -0,0 +1,95 @@
+from pyarrow._flight import (
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+    TracingServerMiddlewareFactory,
+    connect,
+)
+
+__all__ = [
+    "Action",
+    "ActionType",
+    "BasicAuth",
+    "CallInfo",
+    "CertKeyPair",
+    "ClientAuthHandler",
+    "ClientMiddleware",
+    "ClientMiddlewareFactory",
+    "DescriptorType",
+    "FlightCallOptions",
+    "FlightCancelledError",
+    "FlightClient",
+    "FlightDataStream",
+    "FlightDescriptor",
+    "FlightEndpoint",
+    "FlightError",
+    "FlightInfo",
+    "FlightInternalError",
+    "FlightMetadataReader",
+    "FlightMetadataWriter",
+    "FlightMethod",
+    "FlightServerBase",
+    "FlightServerError",
+    "FlightStreamChunk",
+    "FlightStreamReader",
+    "FlightStreamWriter",
+    "FlightTimedOutError",
+    "FlightUnauthenticatedError",
+    "FlightUnauthorizedError",
+    "FlightUnavailableError",
+    "FlightWriteSizeExceededError",
+    "GeneratorStream",
+    "Location",
+    "MetadataRecordBatchReader",
+    "MetadataRecordBatchWriter",
+    "RecordBatchStream",
+    "Result",
+    "SchemaResult",
+    "ServerAuthHandler",
+    "ServerCallContext",
+    "ServerMiddleware",
+    "ServerMiddlewareFactory",
+    "Ticket",
+    "TracingServerMiddlewareFactory",
+    "connect",
+]
diff --git a/python/stubs/fs.pyi b/python/stubs/fs.pyi
new file mode 100644
index 00000000000..6bf75616c13
--- /dev/null
+++ b/python/stubs/fs.pyi
@@ -0,0 +1,77 @@
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    SupportedFileSystem,
+)
+from pyarrow._azurefs import AzureFileSystem
+from pyarrow._hdfs import HadoopFileSystem
+from pyarrow._gcsfs import GcsFileSystem
+from pyarrow._s3fs import (  # noqa
+    AwsDefaultS3RetryStrategy,
+    AwsStandardS3RetryStrategy,
+    S3FileSystem,
+    S3LogLevel,
+    S3RetryStrategy,
+    ensure_s3_initialized,
+    finalize_s3,
+    ensure_s3_finalized,
+    initialize_s3,
+    resolve_s3_region,
+)
+
+FileStats = FileInfo
+
+def copy_files(
+    source: str,
+    destination: str,
+    source_filesystem: SupportedFileSystem | None = None,
+    destination_filesystem: SupportedFileSystem | None = None,
+    *,
+    chunk_size: int = 1024 * 1024,
+    use_threads: bool = True,
+) -> None: ...
+
+class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
+    fs: SupportedFileSystem
+    def __init__(self, fs: SupportedFileSystem) -> None: ...
+
+__all__ = [
+    # _fs
+    "FileSelector",
+    "FileType",
+    "FileInfo",
+    "FileSystem",
+    "LocalFileSystem",
+    "SubTreeFileSystem",
+    "_MockFileSystem",
+    "FileSystemHandler",
+    "PyFileSystem",
+    # _azurefs
+    "AzureFileSystem",
+    # _hdfs
+    "HadoopFileSystem",
+    # _gcsfs
+    "GcsFileSystem",
+    # _s3fs
+    "AwsDefaultS3RetryStrategy",
+    "AwsStandardS3RetryStrategy",
+    "S3FileSystem",
+    "S3LogLevel",
+    "S3RetryStrategy",
+    "ensure_s3_initialized",
+    "finalize_s3",
+    "ensure_s3_finalized",
+    "initialize_s3",
+    "resolve_s3_region",
+    # fs
+    "FileStats",
+    "copy_files",
+    "FSSpecHandler",
+]
diff --git a/python/stubs/gandiva.pyi b/python/stubs/gandiva.pyi
new file mode 100644
index 00000000000..a344f885b29
--- /dev/null
+++ b/python/stubs/gandiva.pyi
@@ -0,0 +1,65 @@
+from typing import Iterable, Literal
+
+from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
+
+class Node(_Weakrefable):
+    def return_type(self) -> DataType: ...
+
+class Expression(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class Condition(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class SelectionVector(_Weakrefable):
+    def to_array(self) -> Array: ...
+
+class Projector(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, selection: SelectionVector | None = None
+    ) -> list[Array]: ...
+
+class Filter(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
+    ) -> SelectionVector: ...
+
+class TreeExprBuilder(_Weakrefable):
+    def make_literal(self, value: float | str | bytes | bool, dtype: DataType) -> Node: ...
+    def make_expression(self, root_node: Node, return_field: Field) -> Expression: ...
+    def make_function(self, name: str, children: list[Node], return_type: DataType) -> Node: ...
+    def make_field(self, field: Field) -> Node: ...
+    def make_if(
+        self, condition: Node, this_node: Node, else_node: Node, return_type: DataType
+    ) -> Node: ...
+    def make_and(self, children: list[Node]) -> Node: ...
+    def make_or(self, children: list[Node]) -> Node: ...
+    def make_in_expression(self, node: Node, values: Iterable, dtype: DataType) -> Node: ...
+    def make_condition(self, condition: Node) -> Condition: ...
+
+class Configuration(_Weakrefable):
+    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
+
+def make_projector(
+    schema: Schema,
+    children: list[Expression],
+    pool: MemoryPool,
+    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
+    configuration: Configuration | None = None,
+) -> Projector: ...
+def make_filter(
+    schema: Schema, condition: Condition, configuration: Configuration | None = None
+) -> Filter: ...
+
+class FunctionSignature(_Weakrefable):
+    def return_type(self) -> DataType: ...
+    def param_types(self) -> list[DataType]: ...
+    def name(self) -> str: ...
+
+def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/stubs/interchange/__init__.pyi b/python/stubs/interchange/__init__.pyi
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/stubs/interchange/buffer.pyi b/python/stubs/interchange/buffer.pyi
new file mode 100644
index 00000000000..46673961a75
--- /dev/null
+++ b/python/stubs/interchange/buffer.pyi
@@ -0,0 +1,58 @@
+import enum
+
+from pyarrow.lib import Buffer
+
+class DlpackDeviceType(enum.IntEnum):
+    """Integer enum for device type codes matching DLPack."""
+
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+class _PyArrowBuffer:
+    """
+    Data in the buffer is guaranteed to be contiguous in memory.
+
+    Note that there is no dtype attribute present, a buffer can be thought of
+    as simply a block of memory. However, if the column that the buffer is
+    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
+    implemented, then that dtype information will be contained in the return
+    value from ``__dlpack__``.
+
+    This distinction is useful to support both data exchange via DLPack on a
+    buffer and (b) dtypes like variable-length strings which do not have a
+    fixed number of bytes per element.
+    """
+    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
+    @property
+    def bufsize(self) -> int:
+        """
+        Buffer size in bytes.
+        """
+    @property
+    def ptr(self) -> int:
+        """
+        Pointer to start of the buffer as an integer.
+        """
+    def __dlpack__(self):
+        """
+        Produce DLPack capsule (see array API standard).
+
+        Raises:
+            - TypeError : if the buffer contains unsupported dtypes.
+            - NotImplementedError : if DLPack support is not implemented
+
+        Useful to have to connect to array libraries. Support optional because
+        it's not completely trivial to implement for a Python-only library.
+        """
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
+        """
+        Device type and device ID for where the data in the buffer resides.
+        Uses device type codes matching DLPack.
+        Note: must be implemented even if ``__dlpack__`` is not.
+        """
diff --git a/python/stubs/interchange/column.pyi b/python/stubs/interchange/column.pyi
new file mode 100644
index 00000000000..e6662867b6b
--- /dev/null
+++ b/python/stubs/interchange/column.pyi
@@ -0,0 +1,252 @@
+import enum
+
+from typing import Any, Iterable, TypeAlias, TypedDict
+
+from pyarrow.lib import Array, ChunkedArray
+
+from .buffer import _PyArrowBuffer
+
+class DtypeKind(enum.IntEnum):
+    """
+    Integer enum for data types.
+
+    Attributes
+    ----------
+    INT : int
+        Matches to signed integer data type.
+    UINT : int
+        Matches to unsigned integer data type.
+    FLOAT : int
+        Matches to floating point data type.
+    BOOL : int
+        Matches to boolean data type.
+    STRING : int
+        Matches to string data type (UTF-8 encoded).
+    DATETIME : int
+        Matches to datetime data type.
+    CATEGORICAL : int
+        Matches to categorical data type.
+    """
+
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21  # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
+
+class ColumnNullType(enum.IntEnum):
+    """
+    Integer enum for null type representation.
+
+    Attributes
+    ----------
+    NON_NULLABLE : int
+        Non-nullable column.
+    USE_NAN : int
+        Use explicit float NaN value.
+    USE_SENTINEL : int
+        Sentinel value besides NaN.
+    USE_BITMASK : int
+        The bit is set/unset representing a null on a certain position.
+    USE_BYTEMASK : int
+        The byte is set/unset representing a null on a certain position.
+    """
+
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
+class ColumnBuffers(TypedDict):
+    data: tuple[_PyArrowBuffer, Dtype]
+    validity: tuple[_PyArrowBuffer, Dtype] | None
+    offsets: tuple[_PyArrowBuffer, Dtype] | None
+
+class CategoricalDescription(TypedDict):
+    is_ordered: bool
+    is_dictionary: bool
+    categories: _PyArrowColumn | None
+
+class Endianness(enum.Enum):
+    LITTLE = "<"
+    BIG = ">"
+    NATIVE = "="
+    NA = "|"
+
+class NoBufferPresent(Exception):
+    """Exception to signal that there is no requested buffer."""
+
+class _PyArrowColumn:
+    """
+    A column object, with only the methods and properties required by the
+    interchange protocol defined.
+
+    A column can contain one or more chunks. Each chunk can contain up to three
+    buffers - a data buffer, a mask buffer (depending on null representation),
+    and an offsets buffer (if variable-size binary; e.g., variable-length
+    strings).
+
+    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
+         Instead, it seems to use "children" for both columns with a bit mask,
+         and for nested dtypes. Unclear whether this is elegant or confusing.
+         This design requires checking the null representation explicitly.
+
+         The Arrow design requires checking:
+         1. the ARROW_FLAG_NULLABLE (for sentinel values)
+         2. if a column has two children, combined with one of those children
+            having a null dtype.
+
+         Making the mask concept explicit seems useful. One null dtype would
+         not be enough to cover both bit and byte masks, so that would mean
+         even more checking if we did it the Arrow way.
+
+    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
+         multiple buffers per array (= column here). Semantically it may make
+         sense to have both: chunks were meant for example for lazy evaluation
+         of data which doesn't fit in memory, while multiple buffers per column
+         could also come from doing a selection operation on a single
+         contiguous buffer.
+
+         Given these concepts, one would expect chunks to be all of the same
+         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
+         while multiple buffers could have data-dependent lengths. Not an issue
+         in pandas if one column is backed by a single NumPy array, but in
+         Arrow it seems possible.
+         Are multiple chunks *and* multiple buffers per column necessary for
+         the purposes of this interchange protocol, or must producers either
+         reuse the chunk concept for this or copy the data?
+
+    Note: this Column object can only be produced by ``__dataframe__``, so
+          doesn't need its own version or ``__column__`` protocol.
+    """
+    def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
+    def size(self) -> int:
+        """
+        Size of the column, in elements.
+
+        Corresponds to DataFrame.num_rows() if column is a single chunk;
+        equal to size of this current chunk otherwise.
+
+        Is a method rather than a property because it may cause a (potentially
+        expensive) computation for some dataframe implementations.
+        """
+    @property
+    def offset(self) -> int:
+        """
+        Offset of first element.
+
+        May be > 0 if using chunks; for example for a column with N chunks of
+        equal size M (only the last chunk may be shorter),
+        ``offset = n * M``, ``n = 0 .. N-1``.
+        """
+    @property
+    def dtype(self) -> tuple[DtypeKind, int, str, str]:
+        """
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+
+        Bit-width : the number of bits as an integer
+        Format string : data type description format string in Apache Arrow C
+                        Data Interface format.
+        Endianness : current only native endianness (``=``) is supported
+
+        Notes:
+            - Kind specifiers are aligned with DLPack where possible (hence the
+              jump to 20, leave enough room for future extension)
+            - Masks must be specified as boolean with either bit width 1 (for
+              bit masks) or 8 (for byte masks).
+            - Dtype width in bits was preferred over bytes
+            - Endianness isn't too useful, but included now in case in the
+              future we need to support non-native endianness
+            - Went with Apache Arrow format strings over NumPy format strings
+              because they're more complete from a dataframe perspective
+            - Format strings are mostly useful for datetime specification, and
+              for categoricals.
+            - For categoricals, the format string describes the type of the
+              categorical in the data buffer. In case of a separate encoding of
+              the categorical (e.g. an integer to string mapping), this can
+              be derived from ``self.describe_categorical``.
+            - Data types not included: complex, Arrow-style null, binary,
+              decimal, and nested (list, struct, map, union) dtypes.
+        """
+    @property
+    def describe_categorical(self) -> CategoricalDescription:
+        """
+        If the dtype is categorical, there are two options:
+        - There are only values in the data buffer.
+        - There is a separate non-categorical Column encoding categorical
+          values.
+
+        Raises TypeError if the dtype is not categorical
+
+        Returns the dictionary with description on how to interpret the
+        data buffer:
+            - "is_ordered" : bool, whether the ordering of dictionary indices
+                             is semantically meaningful.
+            - "is_dictionary" : bool, whether a mapping of
+                                categorical values to other objects exists
+            - "categories" : Column representing the (implicit) mapping of
+                             indices to category values (e.g. an array of
+                             cat1, cat2, ...). None if not a dictionary-style
+                             categorical.
+
+        TBD: are there any other in-memory representations that are needed?
+        """
+    @property
+    def describe_null(self) -> tuple[ColumnNullType, Any]:
+        """
+        Return the missing value (or "null") representation the column dtype
+        uses, as a tuple ``(kind, value)``.
+
+        Value : if kind is "sentinel value", the actual value. If kind is a bit
+        mask or a byte mask, the value (0 or 1) indicating a missing value.
+        None otherwise.
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null elements, if known.
+
+        Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the column. See `DataFrame.metadata` for more details.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the column consists of.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the chunks.
+
+        See `DataFrame.get_chunks` for details on ``n_chunks``.
+        """
+    def get_buffers(self) -> ColumnBuffers:
+        """
+        Return a dictionary containing the underlying buffers.
+
+        The returned dictionary has the following contents:
+
+            - "data": a two-element tuple whose first element is a buffer
+                      containing the data and whose second element is the data
+                      buffer's associated dtype.
+            - "validity": a two-element tuple whose first element is a buffer
+                          containing mask values indicating missing data and
+                          whose second element is the mask value buffer's
+                          associated dtype. None if the null representation is
+                          not a bit or byte mask.
+            - "offsets": a two-element tuple whose first element is a buffer
+                         containing the offset values for variable-size binary
+                         data (e.g., variable-length strings) and whose second
+                         element is the offsets buffer's associated dtype. None
+                         if the data buffer does not have an associated offsets
+                         buffer.
+        """
diff --git a/python/stubs/interchange/dataframe.pyi b/python/stubs/interchange/dataframe.pyi
new file mode 100644
index 00000000000..526a58926a9
--- /dev/null
+++ b/python/stubs/interchange/dataframe.pyi
@@ -0,0 +1,102 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Any, Iterable, Sequence
+
+from pyarrow.interchange.column import _PyArrowColumn
+from pyarrow.lib import RecordBatch, Table
+
+class _PyArrowDataFrame:
+    """
+    A data frame class, with only the methods required by the interchange
+    protocol defined.
+
+    A "data frame" represents an ordered collection of named columns.
+    A column's "name" must be a unique string.
+    Columns may be accessed by name or by position.
+
+    This could be a public data frame class, or an object with the methods and
+    attributes defined on this DataFrame class could be returned from the
+    ``__dataframe__`` method of a public data frame class in a library adhering
+    to the dataframe interchange protocol specification.
+    """
+
+    def __init__(
+        self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> None: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Construct a new exchange object, potentially changing the parameters.
+        ``nan_as_null`` is a keyword intended for the consumer to tell the
+        producer to overwrite null values in the data with ``NaN``.
+        It is intended for cases where the consumer does not support the bit
+        mask or byte mask that is the producer's native representation.
+        ``allow_copy`` is a keyword that defines whether or not the library is
+        allowed to make a copy of the data. For example, copying data would be
+        necessary if a library supports strided buffers, given that this
+        protocol specifies contiguous buffers.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the data frame, as a dictionary with string keys. The
+        contents of `metadata` may be anything, they are meant for a library
+        to store information that it needs to, e.g., roundtrip losslessly or
+        for two implementations to share data that is not (yet) part of the
+        interchange protocol specification. For avoiding collisions with other
+        entries, please add name the keys with the name of the library
+        followed by a period and the desired name, e.g, ``pandas.indexcol``.
+        """
+    def num_columns(self) -> int:
+        """
+        Return the number of columns in the DataFrame.
+        """
+    def num_rows(self) -> int:
+        """
+        Return the number of rows in the DataFrame, if available.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the DataFrame consists of.
+        """
+    def column_names(self) -> Iterable[str]:
+        """
+        Return an iterator yielding the column names.
+        """
+    def get_column(self, i: int) -> _PyArrowColumn:
+        """
+        Return the column at the indicated position.
+        """
+    def get_column_by_name(self, name: str) -> _PyArrowColumn:
+        """
+        Return the column whose name is the indicated name.
+        """
+    def get_columns(self) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the columns.
+        """
+    def select_columns(self, indices: Sequence[int]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by index.
+        """
+    def select_columns_by_name(self, names: Sequence[str]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by name.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]:
+        """
+        Return an iterator yielding the chunks.
+
+        By default (None), yields the chunks that the data is stored as by the
+        producer. If given, ``n_chunks`` must be a multiple of
+        ``self.num_chunks()``, meaning the producer must subdivide each chunk
+        before yielding it.
+
+        Note that the producer must ensure that all columns are chunked the
+        same way.
+        """
diff --git a/python/stubs/interchange/from_dataframe.pyi b/python/stubs/interchange/from_dataframe.pyi
new file mode 100644
index 00000000000..b04b6268975
--- /dev/null
+++ b/python/stubs/interchange/from_dataframe.pyi
@@ -0,0 +1,244 @@
+from typing import Any, Protocol, TypeAlias
+
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+
+from .column import (
+    ColumnBuffers,
+    ColumnNullType,
+    Dtype,
+    DtypeKind,
+)
+
+class DataFrameObject(Protocol):
+    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> Any: ...
+
+ColumnObject: TypeAlias = Any
+
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table:
+    """
+    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Table
+
+    Examples
+    --------
+    >>> import pyarrow
+    >>> from pyarrow.interchange import from_dataframe
+
+    Convert a pandas dataframe to a pyarrow table:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "n_attendees": [100, 10, 1],
+    ...         "country": ["Italy", "Spain", "Slovenia"],
+    ...     }
+    ... )
+    >>> df
+       n_attendees   country
+    0          100     Italy
+    1           10     Spain
+    2            1  Slovenia
+    >>> from_dataframe(df)
+    pyarrow.Table
+    n_attendees: int64
+    country: large_string
+    ----
+    n_attendees: [[100,10,1]]
+    country: [["Italy","Spain","Slovenia"]]
+    """
+
+def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch:
+    """
+    Convert interchange protocol chunk to ``pa.RecordBatch``.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.RecordBatch
+    """
+
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding one of the primitive dtypes to a PyArrow array.
+    A primitive type is one of: int, uint, float, bool (1 bit).
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding boolean dtype to a PyArrow array.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def categorical_column_to_dictionary(
+    col: ColumnObject, allow_copy: bool = True
+) -> DictionaryArray:
+    """
+    Convert a column holding categorical data to a pa.DictionaryArray.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.DictionaryArray
+    """
+
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]:
+    """Parse datetime `format_str` to interpret the `data`."""
+
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType:
+    """Map column date type to pyarrow date type."""
+
+def buffers_to_array(
+    buffers: ColumnBuffers,
+    data_type: tuple[DtypeKind, int, str, str],
+    length: int,
+    describe_null: ColumnNullType,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Array:
+    """
+    Build a PyArrow array from the passed buffer.
+
+    Parameters
+    ----------
+    buffer : ColumnBuffers
+        Dictionary containing tuples of underlying buffers and
+        their associated dtype.
+    data_type : Tuple[DtypeKind, int, str, str],
+        Dtype description of the column as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    length : int
+        The number of values in the array.
+    describe_null: ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+
+    Notes
+    -----
+    The returned array doesn't own the memory. The caller of this function
+    is responsible for keeping the memory owner object alive as long as
+    the returned PyArrow array is being used.
+    """
+
+def validity_buffer_from_mask(
+    validity_buff: Buffer,
+    validity_dtype: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from the passed mask buffer.
+
+    Parameters
+    ----------
+    validity_buff : BufferObject
+        Tuple of underlying validity buffer and associated dtype.
+    validity_dtype : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
+
+def validity_buffer_nan_sentinel(
+    data_pa_buffer: Buffer,
+    data_type: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from NaN or sentinel values.
+
+    Parameters
+    ----------
+    data_pa_buffer : pa.Buffer
+        PyArrow buffer for the column data.
+    data_type : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
diff --git a/python/stubs/ipc.pyi b/python/stubs/ipc.pyi
new file mode 100644
index 00000000000..c7f2af004d4
--- /dev/null
+++ b/python/stubs/ipc.pyi
@@ -0,0 +1,123 @@
+from io import IOBase
+
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+def open_stream(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchFileWriter: ...
+def open_file(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+def deserialize_pandas(buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+__all__ = [
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/stubs/json.pyi b/python/stubs/json.pyi
new file mode 100644
index 00000000000..db1d35e0b8b
--- /dev/null
+++ b/python/stubs/json.pyi
@@ -0,0 +1,3 @@
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/stubs/lib.pyi b/python/stubs/lib.pyi
new file mode 100644
index 00000000000..1698b55520b
--- /dev/null
+++ b/python/stubs/lib.pyi
@@ -0,0 +1,106 @@
+# ruff: noqa: F403
+from typing import NamedTuple
+
+from .__lib_pxi.array import *
+from .__lib_pxi.benchmark import *
+from .__lib_pxi.builder import *
+from .__lib_pxi.compat import *
+from .__lib_pxi.config import *
+from .__lib_pxi.device import *
+from .__lib_pxi.error import *
+from .__lib_pxi.io import *
+from .__lib_pxi.ipc import *
+from .__lib_pxi.memory import *
+from .__lib_pxi.pandas_shim import *
+from .__lib_pxi.scalar import *
+from .__lib_pxi.table import *
+from .__lib_pxi.tensor import *
+from .__lib_pxi.types import *
+
+class MonthDayNano(NamedTuple):
+    days: int
+    months: int
+    nanoseconds: int
+
+def cpu_count() -> int:
+    """
+    Return the number of threads to use in parallel operations.
+
+    The number of threads is determined at startup by inspecting the
+    ``OMP_NUM_THREADS`` and ``OMP_THREAD_LIMIT`` environment variables.
+    If neither is present, it will default to the number of hardware threads
+    on the system. It can be modified at runtime by calling
+    :func:`set_cpu_count()`.
+
+    See Also
+    --------
+    set_cpu_count : Modify the size of this pool.
+    io_thread_count : The analogous function for the I/O thread pool.
+    """
+
+def set_cpu_count(count: int) -> None:
+    """
+    Set the number of threads to use in parallel operations.
+
+    Parameters
+    ----------
+    count : int
+        The number of concurrent threads that should be used.
+
+    See Also
+    --------
+    cpu_count : Get the size of this pool.
+    set_io_thread_count : The analogous function for the I/O thread pool.
+    """
+
+def is_threading_enabled() -> bool:
+    """
+    Returns True if threading is enabled in libarrow.
+
+    If it isn't enabled, then python shouldn't create any
+    threads either, because we're probably on a system where
+    threading doesn't work (e.g. Emscripten).
+    """
+
+Type_NA: int
+Type_BOOL: int
+Type_UINT8: int
+Type_INT8: int
+Type_UINT16: int
+Type_INT16: int
+Type_UINT32: int
+Type_INT32: int
+Type_UINT64: int
+Type_INT64: int
+Type_HALF_FLOAT: int
+Type_FLOAT: int
+Type_DOUBLE: int
+Type_DECIMAL128: int
+Type_DECIMAL256: int
+Type_DATE32: int
+Type_DATE64: int
+Type_TIMESTAMP: int
+Type_TIME32: int
+Type_TIME64: int
+Type_DURATION: int
+Type_INTERVAL_MONTH_DAY_NANO: int
+Type_BINARY: int
+Type_STRING: int
+Type_LARGE_BINARY: int
+Type_LARGE_STRING: int
+Type_FIXED_SIZE_BINARY: int
+Type_BINARY_VIEW: int
+Type_STRING_VIEW: int
+Type_LIST: int
+Type_LARGE_LIST: int
+Type_LIST_VIEW: int
+Type_LARGE_LIST_VIEW: int
+Type_MAP: int
+Type_FIXED_SIZE_LIST: int
+Type_STRUCT: int
+Type_SPARSE_UNION: int
+Type_DENSE_UNION: int
+Type_DICTIONARY: int
+Type_RUN_END_ENCODED: int
+UnionMode_SPARSE: int
+UnionMode_DENSE: int
diff --git a/python/stubs/orc.pyi b/python/stubs/orc.pyi
new file mode 100644
index 00000000000..2eba8d40a11
--- /dev/null
+++ b/python/stubs/orc.pyi
@@ -0,0 +1,279 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+class ORCFile:
+    """
+    Reader interface for a single ORC file
+
+    Parameters
+    ----------
+    source : str or pyarrow.NativeFile
+        Readable source. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
+    """
+
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata:
+        """The file metadata, as an arrow KeyValueMetadata"""
+    @property
+    def schema(self) -> Schema:
+        """The file schema, as an arrow schema"""
+    @property
+    def nrows(self) -> int:
+        """The number of rows in the file"""
+    @property
+    def nstripes(self) -> int:
+        """The number of stripes in the file"""
+    @property
+    def file_version(self) -> str:
+        """Format version of the ORC file, must be 0.11 or 0.12"""
+    @property
+    def software_version(self) -> str:
+        """Software instance and version that wrote this file"""
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]:
+        """Compression codec of the file"""
+    @property
+    def compression_size(self) -> int:
+        """Number of bytes to buffer for the compression codec in the file"""
+    @property
+    def writer(self) -> str:
+        """Name of the writer that wrote this file.
+        If the writer is unknown then its Writer ID
+        (a number) is returned"""
+    @property
+    def writer_version(self) -> str:
+        """Version of the writer"""
+    @property
+    def row_index_stride(self) -> int:
+        """Number of rows per an entry in the row index or 0
+        if there is no row index"""
+    @property
+    def nstripe_statistics(self) -> int:
+        """Number of stripe statistics"""
+    @property
+    def content_length(self) -> int:
+        """Length of the data stripes in the file in bytes"""
+    @property
+    def stripe_statistics_length(self) -> int:
+        """The number of compressed bytes in the file stripe statistics"""
+    @property
+    def file_footer_length(self) -> int:
+        """The number of compressed bytes in the file footer"""
+    @property
+    def file_postscript_length(self) -> int:
+        """The number of bytes in the file postscript"""
+    @property
+    def file_length(self) -> int:
+        """The number of bytes in the file"""
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch:
+        """Read a single stripe from the file.
+
+        Parameters
+        ----------
+        n : int
+            The stripe index
+        columns : list
+            If not None, only these columns will be read from the stripe. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+            Content of the stripe as a RecordBatch.
+        """
+    def read(self, columns: list[str] | None = None) -> Table:
+        """Read the whole file.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'. Output always follows the
+            ordering of the file and not the `columns` list.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a Table.
+        """
+
+class ORCWriter:
+    """
+    Writer interface for a single ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
+
+    writer: _orc.ORCWriter
+    is_open: bool
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: str = "0.12",
+        batch_size: int = 1024,
+        stripe_size: int = 64 * 1024 * 1024,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+        compression_block_size: int = 65536,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+        row_index_stride: int = 10000,
+        padding_tolerance: float = 0.0,
+        dictionary_key_size_threshold: float = 0.0,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def write(self, table: Table) -> None:
+        """
+        Write the table into an ORC file. The schema of the table must
+        be equal to the schema used when opening the ORC file.
+
+        Parameters
+        ----------
+        table : pyarrow.Table
+            The table to be written into the ORC file
+        """
+    def close(self) -> None:
+        """
+        Close the ORC file
+        """
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Table:
+    """
+    Read a Table from an ORC file.
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name. For file-like objects,
+        only read a single file. Use pyarrow.BufferReader to read a file
+        contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. Output always follows the ordering of the file and
+        not the `columns` list. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    """
+
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: str = "0.12",
+    batch_size: int = 1024,
+    stripe_size: int = 64 * 1024 * 1024,
+    compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+    compression_block_size: int = 65536,
+    compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+    row_index_stride: int = 10000,
+    padding_tolerance: float = 0.0,
+    dictionary_key_size_threshold: float = 0.0,
+    bloom_filter_columns: list[int] | None = None,
+    bloom_filter_fpp: float = 0.05,
+) -> None:
+    """
+    Write a table into an ORC file.
+
+    Parameters
+    ----------
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
diff --git a/python/stubs/pandas_compat.pyi b/python/stubs/pandas_compat.pyi
new file mode 100644
index 00000000000..efbd05ac2fe
--- /dev/null
+++ b/python/stubs/pandas_compat.pyi
@@ -0,0 +1,54 @@
+from typing import Any, TypedDict, TypeVar
+
+import numpy as np
+import pandas as pd
+
+from pandas import DatetimeTZDtype
+
+from .lib import Array, DataType, Schema, Table
+
+_T = TypeVar("_T")
+
+def get_logical_type_map() -> dict[int, str]: ...
+def get_logical_type(arrow_type: DataType) -> str: ...
+def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
+def get_logical_type_from_numpy(pandas_collection) -> str: ...
+def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
+
+class _ColumnMetadata(TypedDict):
+    name: str
+    field_name: str
+    pandas_type: int
+    numpy_type: str
+    metadata: dict | None
+
+def get_column_metadata(
+    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
+) -> _ColumnMetadata: ...
+def construct_metadata(
+    columns_to_convert: list[pd.Series],
+    df: pd.DataFrame,
+    column_names: list[str],
+    index_levels: list[pd.Index],
+    index_descriptors: list[dict],
+    preserve_index: bool,
+    types: list[DataType],
+    column_field_names: list[str] = ...,
+) -> dict[bytes, bytes]: ...
+def dataframe_to_types(
+    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
+) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+def dataframe_to_arrays(
+    df: pd.DataFrame,
+    schema: Schema,
+    preserve_index: bool | None,
+    nthreads: int = 1,
+    columns: list[str] | None = None,
+    safe: bool = True,
+) -> tuple[Array, Schema, int]: ...
+def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
+def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+def table_to_dataframe(
+    options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
+) -> pd.DataFrame: ...
+def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
diff --git a/python/stubs/parquet/__init__.pyi b/python/stubs/parquet/__init__.pyi
new file mode 100644
index 00000000000..4ef88705809
--- /dev/null
+++ b/python/stubs/parquet/__init__.pyi
@@ -0,0 +1 @@
+from .core import *  # noqa
diff --git a/python/stubs/parquet/core.pyi b/python/stubs/parquet/core.pyi
new file mode 100644
index 00000000000..56b2c8447d9
--- /dev/null
+++ b/python/stubs/parquet/core.pyi
@@ -0,0 +1,2061 @@
+import sys
+
+from pathlib import Path
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Callable, Iterator, Literal, Sequence
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow import _parquet
+from pyarrow._compute import Expression
+from pyarrow._fs import FileSystem, SupportedFileSystem
+from pyarrow._parquet import (
+    ColumnChunkMetaData,
+    ColumnSchema,
+    FileDecryptionProperties,
+    FileEncryptionProperties,
+    FileMetaData,
+    ParquetLogicalType,
+    ParquetReader,
+    ParquetSchema,
+    RowGroupMetaData,
+    SortingColumn,
+    Statistics,
+)
+from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow.dataset import ParquetFileFragment, Partitioning
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from typing_extensions import deprecated
+
+__all__ = (
+    "ColumnChunkMetaData",
+    "ColumnSchema",
+    "FileDecryptionProperties",
+    "FileEncryptionProperties",
+    "FileMetaData",
+    "ParquetDataset",
+    "ParquetFile",
+    "ParquetLogicalType",
+    "ParquetReader",
+    "ParquetSchema",
+    "ParquetWriter",
+    "RowGroupMetaData",
+    "SortingColumn",
+    "Statistics",
+    "read_metadata",
+    "read_pandas",
+    "read_schema",
+    "read_table",
+    "write_metadata",
+    "write_table",
+    "write_to_dataset",
+    "_filters_to_expression",
+    "filters_to_expression",
+)
+
+def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression:
+    """
+    Check if filters are well-formed and convert to an ``Expression``.
+
+    Parameters
+    ----------
+    filters : List[Tuple] or List[List[Tuple]]
+
+    Notes
+    -----
+    See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
+
+    Examples
+    --------
+
+    >>> filters_to_expression([("foo", "==", "bar")])
+    <pyarrow.compute.Expression (foo == "bar")>
+
+    Returns
+    -------
+    pyarrow.compute.Expression
+        An Expression representing the filters
+    """
+
+@deprecated("use filters_to_expression")
+def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+_Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
+
+class ParquetFile:
+    """
+    Reader interface for a single Parquet file.
+
+    Parameters
+    ----------
+    source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+        Readable source. For passing bytes or buffer-like file containing a
+        Parquet file, use pyarrow.BufferReader.
+    metadata : FileMetaData, default None
+        Use existing metadata object, rather than reading from file.
+    common_metadata : FileMetaData, default None
+        Will be used in reads for pandas schema metadata if not found in the
+        main file's metadata, no other uses at the moment.
+    read_dictionary : list
+        List of column names to read directly as DictionaryArray.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    pre_buffer : bool, default False
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties, default None
+        File decryption properties for Parquet Modular Encryption.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to Parquet file:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Create a ``ParquetFile`` object from the Parquet file:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet")
+
+    Read the data:
+
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+
+    Create a ParquetFile object with "animal" column as DictionaryArray:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet", read_dictionary=["animal"])
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [  -- dictionary:
+    ["Flamingo","Parrot",...,"Brittle stars","Centipede"]  -- indices:
+    [0,1,2,3,4,5]]
+    """
+
+    reader: ParquetReader
+    common_metadata: FileMetaData
+
+    def __init__(
+        self,
+        source: str | Path | NativeFile | IO,
+        *,
+        metadata: FileMetaData | None = None,
+        common_metadata: FileMetaData | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData:
+        """
+        Return the Parquet metadata.
+        """
+    @property
+    def schema(self) -> ParquetSchema:
+        """
+        Return the Parquet schema, unconverted to Arrow types
+        """
+    @property
+    def schema_arrow(self) -> Schema:
+        """
+        Return the inferred Arrow schema, converted from the whole Parquet
+        file's schema
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read the Arrow schema:
+
+        >>> parquet_file.schema_arrow
+        n_legs: int64
+        animal: string
+        """
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups of the Parquet file.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.num_row_groups
+        1
+        """
+    def close(self, force: bool = False) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+    def read_row_group(
+        self,
+        i: int,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a single row group from a Parquet file.
+
+        Parameters
+        ----------
+        i : int
+            Index of the individual row group that we want to read.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row group as a table (of columns)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_group(0)
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def read_row_groups(
+        self,
+        row_groups: list,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a multiple row groups from a Parquet file.
+
+        Parameters
+        ----------
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row groups as a table (of columns).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_groups([0, 0])
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,...,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]]
+        """
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: list | None = None,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]:
+        """
+        Read streaming batches from a Parquet file.
+
+        Parameters
+        ----------
+        batch_size : int, default 64K
+            Maximum number of records to yield per batch. Batches may be
+            smaller if there aren't enough rows in the file.
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : boolean, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : boolean, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Yields
+        ------
+        pyarrow.RecordBatch
+            Contents of each batch as a record batch
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+        >>> for i in parquet_file.iter_batches():
+        ...     print("RecordBatch")
+        ...     print(i.to_pandas())
+        RecordBatch
+           n_legs         animal
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        """
+    def read(
+        self,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a Table from Parquet format.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read a Table:
+
+        >>> parquet_file.read(columns=["animal"])
+        pyarrow.Table
+        animal: string
+        ----
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int:
+        """
+        Read contents of file for the given columns and batch size.
+
+        Notes
+        -----
+        This function's primary purpose is benchmarking.
+        The scan is executed on a single thread.
+
+        Parameters
+        ----------
+        columns : list of integers, default None
+            Select columns to read, if None scan all columns.
+        batch_size : int, default 64K
+            Number of rows to read at a time internally.
+
+        Returns
+        -------
+        num_rows : int
+            Number of rows in file
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.scan_contents()
+        6
+        """
+
+class ParquetWriter:
+    """
+    Class for incrementally building a Parquet file for Arrow tables.
+
+    Parameters
+    ----------
+    where : path or file-like object
+    schema : pyarrow.Schema
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                        <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+            Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+            unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+    writer_engine_version : unused
+    **options : dict
+        If options contains a key `metadata_collector` then the
+        corresponding value is assumed to be a list (or any object with
+        `.append` method) that will be filled with the file metadata instance
+        of the written file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and RecordBatch:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> batch = pa.record_batch(
+    ...     [
+    ...         [2, 2, 4, 4, 5, 100],
+    ...         ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     ],
+    ...     names=["n_legs", "animal"],
+    ... )
+
+    create a ParquetWriter object:
+
+    >>> import pyarrow.parquet as pq
+    >>> writer = pq.ParquetWriter("example.parquet", table.schema)
+
+    and write the Table into the Parquet file:
+
+    >>> writer.write_table(table)
+    >>> writer.close()
+
+    >>> pq.read_table("example.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    create a ParquetWriter object for the RecordBatch:
+
+    >>> writer2 = pq.ParquetWriter("example2.parquet", batch.schema)
+
+    and write the RecordBatch into the Parquet file:
+
+    >>> writer2.write_batch(batch)
+    >>> writer2.close()
+
+    >>> pq.read_table("example2.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+    """
+
+    flavor: str
+    schema_changed: bool
+    schema: ParquetSchema
+    where: str | Path | IO
+    file_handler: NativeFile | None
+    writer: _parquet.ParquetWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: str | Path | IO | NativeFile,
+        schema: Schema,
+        filesystem: SupportedFileSystem | None = None,
+        flavor: str | None = None,
+        version: Literal["1.0", "2.4", "2.6"] = ...,
+        use_dictionary: bool = True,
+        compression: _Compression | dict[str, _Compression] = "snappy",
+        write_statistics: bool | list = True,
+        use_deprecated_int96_timestamps: bool | None = None,
+        compression_level: int | dict | None = None,
+        use_byte_stream_split: bool | list = False,
+        column_encoding: str | dict | None = None,
+        writer_engine_version=None,
+        data_page_version: Literal["1.0", "2.0"] = ...,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileEncryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: Sequence[SortingColumn] | None = None,
+        store_decimal_as_integer: bool = False,
+        **options,
+    ) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+    def write(
+        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
+    ) -> None:
+        """
+        Write RecordBatch or Table to the Parquet file.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the input
+            table or batch length and 1024 * 1024.
+        """
+    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None:
+        """
+        Write RecordBatch to the Parquet file.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        row_group_size : int, default None
+            Maximum number of rows in written row group. If None, the
+            row group size will be the minimum of the RecordBatch
+            size and 1024 * 1024.  If set larger than 64Mi then 64Mi
+            will be used instead.
+        """
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None:
+        """
+        Write Table to the Parquet file.
+
+        Parameters
+        ----------
+        table : Table
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the Table size
+            and 1024 * 1024.  If set larger than 64Mi then 64Mi will
+            be used instead.
+
+        """
+    def close(self) -> None:
+        """
+        Close the connection to the Parquet file.
+        """
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None:
+        """
+        Add key-value metadata to the file.
+        This will overwrite any existing metadata with the same key.
+
+        Parameters
+        ----------
+        key_value_metadata : dict
+            Keys and values must be string-like / coercible to bytes.
+        """
+
+class ParquetDataset:
+    """
+    Encapsulates details of reading a complete Parquet dataset possibly
+    consisting of multiple files and partitions in subdirectories.
+
+    Parameters
+    ----------
+    path_or_paths : str or List[str]
+        A directory name, single file name, or list of file names.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : pyarrow.parquet.Schema
+        Optionally provide the Schema for the Dataset, in which case it will
+        not be inferred from the source.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results. Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular resolution
+        (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
+        timestamps will be inferred as timestamps in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_v2", partition_cols=["year"])
+
+    create a ParquetDataset object from the dataset source:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/")
+
+    and read the data:
+
+    >>> dataset.read().to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+    create a ParquetDataset object with filter:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/", filters=[("n_legs", "=", 4)])
+    >>> dataset.read().to_pandas()
+       n_legs animal  year
+    0       4    Dog  2021
+    1       4  Horse  2022
+    """
+    def __init__(
+        self,
+        path_or_paths: SingleOrList[str]
+        | SingleOrList[Path]
+        | SingleOrList[NativeFile]
+        | SingleOrList[IO],
+        filesystem: SupportedFileSystem | None = None,
+        schema: Schema | None = None,
+        *,
+        filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        partitioning: str | list[str] | Partitioning | None = "hive",
+        ignore_prefixes: list[str] | None = None,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def equals(self, other: ParquetDataset) -> bool: ...
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the Dataset.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_schema", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_schema/")
+
+        Read the schema:
+
+        >>> dataset.schema
+        n_legs: int64
+        animal: string
+        year: dictionary<values=int32, indices=int32, ordered=0>
+        """
+    def read(
+        self,
+        columns: list[str] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read (multiple) Parquet files as a single pyarrow.Table.
+
+        Parameters
+        ----------
+        columns : List[str]
+            Names of columns to read from the dataset. The partition fields
+            are not automatically included.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_read", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_read/")
+
+        Read the dataset:
+
+        >>> dataset.read(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[5],[2],[4,100],[2,4]]
+        """
+    def read_pandas(self, **kwargs) -> Table:
+        """
+        Read dataset including pandas metadata, if any. Other arguments passed
+        through to :func:`read`, see docstring for further details.
+
+        Parameters
+        ----------
+        **kwargs : optional
+            Additional options for :func:`read`
+
+        Examples
+        --------
+        Generate an example parquet file:
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "table_V2.parquet")
+        >>> dataset = pq.ParquetDataset("table_V2.parquet")
+
+        Read the dataset with pandas metadata:
+
+        >>> dataset.read_pandas(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+
+        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
+        """
+    @property
+    def fragments(self) -> list[ParquetFileFragment]:
+        """
+        A list of the Dataset source fragments or pieces with absolute
+        file paths.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_fragments", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_fragments/")
+
+        List the fragments:
+
+        >>> dataset.fragments
+        [<pyarrow.dataset.ParquetFileFragment path=dataset_v2_fragments/...
+        """
+    @property
+    def files(self) -> list[str]:
+        """
+        A list of absolute Parquet file paths in the Dataset source.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_files", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_files/")
+
+        List the files:
+
+        >>> dataset.files
+        ['dataset_v2_files/year=2019/...-0.parquet', ...
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The filesystem type of the Dataset source.
+        """
+    @property
+    def partitioning(self) -> Partitioning:
+        """
+        The partitioning of the Dataset source, if discovered.
+        """
+
+def read_table(
+    source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list[str] | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: str | list[str] | Partitioning | None = "hive",
+    filesystem: SupportedFileSystem | None = None,
+    filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+    ignore_prefixes: list[str] | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: FileDecryptionProperties | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+    page_checksum_verification: bool = False,
+) -> Table:
+    """
+    Read a Table from Parquet format
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    use_pandas_metadata : bool, default False
+        If True and file has custom pandas schema metadata, ensure that
+        index columns are also loaded.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a table (of columns)
+
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_2", partition_cols=["year"])
+
+    Read the data:
+
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+
+    Read only a subset of columns:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]]
+
+    Read a subset of columns and read one column as DictionaryArray:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"], read_dictionary=["animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [  -- dictionary:
+    ["Brittle stars"]  -- indices:
+    [0],  -- dictionary:
+    ["Flamingo"]  -- indices:
+    [0],  -- dictionary:
+    ["Dog","Centipede"]  -- indices:
+    [0,1],  -- dictionary:
+    ["Parrot","Horse"]  -- indices:
+    [0,1]]
+
+    Read the table with filter:
+
+    >>> pq.read_table(
+    ...     "dataset_name_2", columns=["n_legs", "animal"], filters=[("n_legs", "<", 4)]
+    ... ).to_pandas()
+       n_legs    animal
+    0       2  Flamingo
+    1       2    Parrot
+
+    Read data from a single Parquet file:
+
+    >>> pq.write_table(table, "example.parquet")
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+    """
+
+def read_pandas(
+    source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
+) -> Table:
+    """
+
+    Read a Table from Parquet format, also reading DataFrame
+    index values if known in the file metadata
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    **kwargs
+        additional options for :func:`read_table`
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a Table of Columns, including DataFrame
+        indexes as columns
+    """
+
+def write_table(
+    table: Table,
+    where: str | Path | NativeFile | IO,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool = True,
+    compression: _Compression | dict[str, _Compression] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: str | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = ...,
+    use_compliant_nested_type: bool = True,
+    encryption_properties: FileEncryptionProperties | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    write_page_checksum: bool = False,
+    sorting_columns: Sequence[SortingColumn] | None = None,
+    store_decimal_as_integer: bool = False,
+    **kwargs,
+) -> None:
+    """
+
+    Write a Table to Parquet format.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    where : string or pyarrow.NativeFile
+    row_group_size : int
+        Maximum number of rows in each written row group. If None, the
+        row group size will be the minimum of the Table size and
+        1024 * 1024.
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                      <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+          Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+          unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+
+    **kwargs : optional
+        Additional options for ParquetWriter
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write the Table into Parquet file:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Defining row group size for the Parquet file:
+
+    >>> pq.write_table(table, "example.parquet", row_group_size=3)
+
+    Defining row group compression (default is Snappy):
+
+    >>> pq.write_table(table, "example.parquet", compression="none")
+
+    Defining row group compression and encoding per-column:
+
+    >>> pq.write_table(
+    ...     table,
+    ...     "example.parquet",
+    ...     compression={"n_legs": "snappy", "animal": "gzip"},
+    ...     use_dictionary=["n_legs", "animal"],
+    ... )
+
+    Defining column encoding per-column:
+
+    >>> pq.write_table(
+    ...     table, "example.parquet", column_encoding={"animal": "PLAIN"}, use_dictionary=False
+    ... )
+    """
+
+def write_to_dataset(
+    table: Table,
+    root_path: str | Path,
+    partition_cols: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    schema: Schema | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    basename_template: str | None = None,
+    use_threads: bool | None = None,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
+    | None = None,
+    **kwargs,
+) -> None:
+    """
+    Wrapper around dataset.write_dataset for writing a Table to
+    Parquet format by partitions.
+    For each combination of partition columns and values,
+    a subdirectories are created in the following
+        manner:
+
+        root_dir/
+          group1=value1
+            group2=value1
+              <uuid>.parquet
+            group2=value2
+              <uuid>.parquet
+          group1=valueN
+            group2=value1
+              <uuid>.parquet
+            group2=valueN
+              <uuid>.parquet
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    root_path : str, pathlib.Path
+        The root directory of the dataset.
+    partition_cols : list,
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : Schema, optional
+        This Schema of the dataset.
+    partitioning : Partitioning or list[str], optional
+        The partitioning scheme specified with the
+        ``pyarrow.dataset.partitioning()`` function or a list of field names.
+        When providing a list of field names, you can use
+        ``partitioning_flavor`` to drive which partitioning type should be
+        used.
+    basename_template : str, optional
+        A template string used to generate basenames of written data files.
+        The token '{i}' will be replaced with an automatically incremented
+        integer. If not specified, it defaults to "guid-{i}.parquet".
+    use_threads : bool, default True
+        Write files in parallel. If enabled, then maximum parallelism will be
+        used determined by the number of available CPU cores.
+    file_visitor : function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
+
+    existing_data_behavior : 'overwrite_or_ignore' | 'error' | 'delete_matching'
+        Controls how the dataset will handle data that already exists in
+        the destination. The default behaviour is 'overwrite_or_ignore'.
+
+        'overwrite_or_ignore' will ignore any existing data and will
+        overwrite files with the same name as an output file.  Other
+        existing files will be ignored.  This behavior, in combination
+        with a unique basename_template for each write, will allow for
+        an append workflow.
+
+        'error' will raise an error if any data exists in the destination.
+
+        'delete_matching' is useful when you are writing a partitioned
+        dataset.  The first time each partition directory is encountered
+        the entire directory will be deleted.  This allows you to overwrite
+        old partitions completely.
+    **kwargs : dict,
+        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
+        function for matching kwargs, and remainder to
+        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
+        See the docstring of :func:`write_table` and
+        :func:`pyarrow.dataset.write_dataset` for the available options.
+        Using `metadata_collector` in kwargs allows one to collect the
+        file metadata instances of dataset pieces. The file paths in the
+        ColumnChunkMetaData will be set relative to `root_path`.
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write it to a partitioned dataset:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_3", partition_cols=["year"])
+    >>> pq.ParquetDataset("dataset_name_3").files
+    ['dataset_name_3/year=2019/...-0.parquet', ...
+
+    Write a single Parquet file into the root folder:
+
+    >>> pq.write_to_dataset(table, root_path="dataset_name_4")
+    >>> pq.ParquetDataset("dataset_name_4/").files
+    ['dataset_name_4/...-0.parquet']
+    """
+
+def write_metadata(
+    schema: Schema,
+    where: str | NativeFile,
+    metadata_collector: list[FileMetaData] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    **kwargs,
+) -> None:
+    """
+    Write metadata-only Parquet file from schema. This can be used with
+    `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar
+    files.
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema
+    where : string or pyarrow.NativeFile
+    metadata_collector : list
+        where to collect metadata information.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    **kwargs : dict,
+        Additional kwargs for ParquetWriter class. See docstring for
+        `ParquetWriter` for more information.
+
+    Examples
+    --------
+    Generate example data:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    Write a dataset and collect metadata information.
+
+    >>> metadata_collector = []
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, "dataset_metadata", metadata_collector=metadata_collector)
+
+    Write the `_common_metadata` parquet file without row groups statistics.
+
+    >>> pq.write_metadata(table.schema, "dataset_metadata/_common_metadata")
+
+    Write the `_metadata` parquet file with row groups statistics.
+
+    >>> pq.write_metadata(
+    ...     table.schema, "dataset_metadata/_metadata", metadata_collector=metadata_collector
+    ... )
+    """
+
+def read_metadata(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> FileMetaData:
+    """
+    Read FileMetaData from footer of a single Parquet file.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    metadata : FileMetaData
+        The metadata of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_metadata("example.parquet")
+    <pyarrow._parquet.FileMetaData object at ...>
+      created_by: parquet-cpp-arrow version ...
+      num_columns: 2
+      num_rows: 3
+      num_row_groups: 1
+      format_version: 2.6
+      serialized_size: ...
+    """
+
+def read_schema(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Schema:
+    """
+    Read effective Arrow schema from Parquet file metadata.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+        The schema of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_schema("example.parquet")
+    n_legs: int64
+    animal: string
+    """
diff --git a/python/stubs/parquet/encryption.pyi b/python/stubs/parquet/encryption.pyi
new file mode 100644
index 00000000000..5a77dae7ef7
--- /dev/null
+++ b/python/stubs/parquet/encryption.pyi
@@ -0,0 +1,15 @@
+from pyarrow._parquet_encryption import (
+    CryptoFactory,
+    DecryptionConfiguration,
+    EncryptionConfiguration,
+    KmsClient,
+    KmsConnectionConfig,
+)
+
+__all__ = [
+    "CryptoFactory",
+    "DecryptionConfiguration",
+    "EncryptionConfiguration",
+    "KmsClient",
+    "KmsConnectionConfig",
+]
diff --git a/python/stubs/substrait.pyi b/python/stubs/substrait.pyi
new file mode 100644
index 00000000000..a56a8a5b40f
--- /dev/null
+++ b/python/stubs/substrait.pyi
@@ -0,0 +1,21 @@
+from pyarrow._substrait import (
+    BoundExpressions,
+    SubstraitSchema,
+    deserialize_expressions,
+    deserialize_schema,
+    get_supported_functions,
+    run_query,
+    serialize_expressions,
+    serialize_schema,
+)
+
+__all__ = [
+    "BoundExpressions",
+    "get_supported_functions",
+    "run_query",
+    "deserialize_expressions",
+    "serialize_expressions",
+    "deserialize_schema",
+    "serialize_schema",
+    "SubstraitSchema",
+]
diff --git a/python/stubs/types.pyi b/python/stubs/types.pyi
new file mode 100644
index 00000000000..0cb4f6171d3
--- /dev/null
+++ b/python/stubs/types.pyi
@@ -0,0 +1,194 @@
+import sys
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    Uint32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | Uint32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = TimestampType[Any, Any] | DurationType[Any] | _Time | _Date | _Interval
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[Uint32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+
+__all__ = [
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
diff --git a/python/stubs/util.pyi b/python/stubs/util.pyi
new file mode 100644
index 00000000000..c2ecf7d6b61
--- /dev/null
+++ b/python/stubs/util.pyi
@@ -0,0 +1,27 @@
+from collections.abc import Callable
+from os import PathLike
+from typing import Any, Protocol, Sequence, TypeVar
+
+_F = TypeVar("_F", bound=Callable)
+_N = TypeVar("_N")
+
+class _DocStringComponents(Protocol):
+    _docstring_components: list[str]
+
+def doc(
+    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
+) -> Callable[[_F], _F]: ...
+def _is_iterable(obj) -> bool: ...
+def _is_path_like(path) -> bool: ...
+def _stringify_path(path: str | PathLike) -> str: ...
+def product(seq: Sequence[_N]) -> _N: ...
+def get_contiguous_span(
+    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
+) -> tuple[int, int]: ...
+def find_free_port() -> int: ...
+def guid() -> str: ...
+def _download_urllib(url, out_path) -> None: ...
+def _download_requests(url, out_path) -> None: ...
+def download_tzdata_on_windows() -> None: ...
+def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
+def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...

From 70807bbb5547573bf95f760619f80c2ed0785c79 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 19:30:47 +0200
Subject: [PATCH 02/32] GH-7: [Python] Fix invalid-context-manager error (#30)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix invalid-context-manager

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_flight.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index e9e99d8eb83..42de960ac04 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -49,8 +49,14 @@
         ClientMiddleware, ClientMiddlewareFactory,
     )
 except ImportError:
+    class context_like(object):
+        def __enter__(self):
+            return self
+        def __exit__(self, exc_type, exc_value, traceback):
+            pass
+
     flight = None
-    FlightClient, FlightServerBase = object, object
+    FlightClient, FlightServerBase = context_like, context_like
     ServerAuthHandler, ClientAuthHandler = object, object
     ServerMiddleware, ServerMiddlewareFactory = object, object
     ClientMiddleware, ClientMiddlewareFactory = object, object

From e69c7f54656db27035f43927773abd8eb70f4e08 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 19:38:10 +0200
Subject: [PATCH 03/32] GH-8: [Python] Fix invalid-type-form (#31)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix invalid-type-form errors

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/stubs/__lib_pxi/array.pyi | 4 ++--
 python/stubs/__lib_pxi/io.pyi    | 3 ++-
 python/stubs/__lib_pxi/table.pyi | 3 ++-
 python/stubs/compute.pyi         | 3 ++-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/stubs/__lib_pxi/array.pyi b/python/stubs/__lib_pxi/array.pyi
index ec1cda30a88..17eb4c6d888 100644
--- a/python/stubs/__lib_pxi/array.pyi
+++ b/python/stubs/__lib_pxi/array.pyi
@@ -1,3 +1,4 @@
+import builtins
 import datetime as dt
 import sys
 
@@ -1990,8 +1991,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
     @overload
     def __getitem__(self, key: int) -> _Scalar_co: ...
     @overload
-    def __getitem__(self, key: slice) -> Self: ...
-    def __getitem__(self, key):
+    def __getitem__(self, key: builtins.slice) -> Self: ...
         """
         Slice or return value at given index
 
diff --git a/python/stubs/__lib_pxi/io.pyi b/python/stubs/__lib_pxi/io.pyi
index d882fd79d57..37c8aefb06b 100644
--- a/python/stubs/__lib_pxi/io.pyi
+++ b/python/stubs/__lib_pxi/io.pyi
@@ -1,3 +1,4 @@
+import builtins
 import sys
 
 from collections.abc import Callable
@@ -578,7 +579,7 @@ class Buffer(_Weakrefable):
     @property
     def parent(self) -> Buffer | None: ...
     @overload
-    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key: builtins.slice) -> Self: ...
     @overload
     def __getitem__(self, key: int) -> int: ...
     def slice(self, offset: int = 0, length: int | None = None) -> Self:
diff --git a/python/stubs/__lib_pxi/table.pyi b/python/stubs/__lib_pxi/table.pyi
index ad9d0392137..ad34e9b6dff 100644
--- a/python/stubs/__lib_pxi/table.pyi
+++ b/python/stubs/__lib_pxi/table.pyi
@@ -1,3 +1,4 @@
+import builtins
 import datetime as dt
 import sys
 
@@ -294,7 +295,7 @@ class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         """
     def __sizeof__(self) -> int: ...
     @overload
-    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key: builtins.slice) -> Self: ...
     @overload
     def __getitem__(self, key: int) -> _Scalar_co: ...
     def __getitem__(self, key):
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
index 8d8fc35b134..f9039731ee6 100644
--- a/python/stubs/compute.pyi
+++ b/python/stubs/compute.pyi
@@ -93,6 +93,7 @@ from . import lib
 
 _P = ParamSpec("_P")
 _R = TypeVar("_R")
+_CallableType = Callable[_P, _R]
 
 def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
     """Reference a column of the dataset.
@@ -156,7 +157,7 @@ def scalar(value: bool | float | str) -> Expression:
         An Expression representing the scalar value
     """
 
-def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+def _clone_signature(f: _CallableType) -> _CallableType: ...
 
 # ============= compute functions =============
 _DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)

From 8298c76f2a9c4b8e74407ba2f32d8cdd0f981943 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 19:41:32 +0200
Subject: [PATCH 04/32] GH-9: [Python] Fix non-subscriptable error (#32)

* fix: The type parameter of array should be covariant (#253)

* release 20.0.0.20250716 (#254)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix non-subscriptable issues

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/pandas_compat.py                 |  4 ++--
 python/pyarrow/tests/test_cuda_numba_interop.py | 12 ++++++------
 python/pyarrow/tests/test_gdb.py                |  2 +-
 python/stubs/__lib_pxi/array.pyi                |  1 +
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 5e2ee49437e..bb54c3b22c3 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -755,8 +755,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
         # create ExtensionBlock
         arr = item['py_array']
         assert len(placement) == 1
-        name = columns[placement[0]]
-        pandas_dtype = extension_columns[name]
+        name = columns.get(placement[0], None)
+        pandas_dtype = extension_columns.get(name, None)
         if not hasattr(pandas_dtype, '__from_arrow__'):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index 876f3c7f761..3bd81d755f5 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -49,7 +49,7 @@ def teardown_module(module):
 @pytest.mark.parametrize("c", range(len(context_choice_ids)),
                          ids=context_choice_ids)
 def test_context(c):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
     assert ctx.handle == nb_ctx.handle.value
     assert ctx.handle == ctx.to_numba().handle.value
     ctx2 = cuda.Context.from_numba(nb_ctx)
@@ -83,7 +83,7 @@ def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 @pytest.mark.parametrize("size", [0, 1, 8, 1000])
 def test_from_object(c, dtype, size):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
     arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
 
     # Creating device buffer from numba DeviceNDArray:
@@ -161,7 +161,7 @@ def __cuda_array_interface__(self):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_memalloc(c, dtype):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
     dtype = np.dtype(dtype)
     # Allocate memory using numba context
     # Warning: this will not be reflected in pyarrow context manager
@@ -184,7 +184,7 @@ def test_numba_memalloc(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_pyarrow_memalloc(c, dtype):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
     size = 10
     arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
 
@@ -198,7 +198,7 @@ def test_pyarrow_memalloc(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_context(c, dtype):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
     size = 10
     with nb_cuda.gpus[0]:
         arr, cbuf = make_random_buffer(size, target='device',
@@ -217,7 +217,7 @@ def test_numba_context(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_pyarrow_jit(c, dtype):
-    ctx, nb_ctx = context_choices[c]
+    ctx, nb_ctx = context_choices.get(c, (None, None))
 
     @nb_cuda.jit
     def increment_by_one(an_array):
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 912953ae60d..58aabb7368e 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -159,7 +159,7 @@ def select_frame(self, func_name):
         if m is None:
             pytest.fail(f"Could not select frame for function {func_name}")
 
-        frame_num = int(m[1])
+        frame_num = int(m.get(1, None))
         out = self.run_command(f"frame {frame_num}")
         assert f"in {func_name}" in out
 
diff --git a/python/stubs/__lib_pxi/array.pyi b/python/stubs/__lib_pxi/array.pyi
index 17eb4c6d888..ffdb8a9c075 100644
--- a/python/stubs/__lib_pxi/array.pyi
+++ b/python/stubs/__lib_pxi/array.pyi
@@ -1992,6 +1992,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
     def __getitem__(self, key: int) -> _Scalar_co: ...
     @overload
     def __getitem__(self, key: builtins.slice) -> Self: ...
+    def __getitem__(self, key):
         """
         Slice or return value at given index
 

From 42c73d5f4b47253c422f5bef3926ded7721fed2d Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 19:55:29 +0200
Subject: [PATCH 05/32] GH-11: [Python] Fix no-matching-overload error (#34)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix no-matching-overload

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_array.py   | 3 ++-
 python/pyarrow/tests/test_compute.py | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 009ab1e849b..9a5044ce394 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -550,7 +550,8 @@ def test_arange():
     for case in cases:
         result = pa.arange(*case)
         result.validate(full=True)
-        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))
+
+        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))  # type: ignore[no-matching-overload]
 
     # Validate memory_pool keyword argument
     result = pa.arange(-1, 101, memory_pool=pa.default_memory_pool())
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index ad61dbc48a7..97f694df1fc 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1737,10 +1737,10 @@ def test_arithmetic_multiply():
 @pytest.mark.parametrize("ty", ["round", "round_to_multiple"])
 def test_round_to_integer(ty):
     if ty == "round":
-        round = pc.round
+        round_func = pc.round
         RoundOptions = partial(pc.RoundOptions, ndigits=0)
     elif ty == "round_to_multiple":
-        round = pc.round_to_multiple
+        round_func = pc.round_to_multiple
         RoundOptions = partial(pc.RoundToMultipleOptions, multiple=1)
 
     values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
@@ -1758,7 +1758,7 @@ def test_round_to_integer(ty):
     }
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
-        result = round(values, options=options)
+        result = round_func(values, options=options)
         expected_array = pa.array(expected, type=pa.float64())
         assert expected_array.equals(result)
 

From 053fdbdbbde29a33bae3036605e8c1e79d9590cd Mon Sep 17 00:00:00 2001
From: "Patrick J. Roddy" <patrickjamesroddy@gmail.com>
Date: Thu, 24 Jul 2025 18:58:20 +0100
Subject: [PATCH 06/32] GH-12: [Python] Fix `invalid-return-type` error (#25)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix `invalid-return-type` error

* Fix linting

---------

Co-authored-by: Rok Mihevc <rok@mihevc.org>
---
 python/pyarrow/interchange/column.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/interchange/column.py b/python/pyarrow/interchange/column.py
index ddbceabcb00..f80c586ff95 100644
--- a/python/pyarrow/interchange/column.py
+++ b/python/pyarrow/interchange/column.py
@@ -20,7 +20,6 @@
 import enum
 from typing import (
     Any,
-    Dict,
     Iterable,
     Optional,
     Tuple,
@@ -379,7 +378,7 @@ def describe_null(self) -> Tuple[ColumnNullType, Any]:
             return ColumnNullType.USE_BITMASK, 0
 
     @property
-    def null_count(self) -> int:
+    def null_count(self) -> int | None:
         """
         Number of null elements, if known.
 
@@ -390,7 +389,7 @@ def null_count(self) -> int:
         return n
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> None:
         """
         The metadata for the column. See `DataFrame.metadata` for more details.
         """
@@ -466,7 +465,7 @@ def get_buffers(self) -> ColumnBuffers:
 
     def _get_data_buffer(
         self,
-    ) -> Tuple[_PyArrowBuffer, Any]:  # Any is for self.dtype tuple
+    ) -> Tuple[_PyArrowBuffer, Any] | None:  # Any is for self.dtype tuple
         """
         Return the buffer containing the data and the buffer's
         associated dtype.
@@ -505,7 +504,7 @@ def _get_validity_buffer(self) -> Tuple[_PyArrowBuffer, Any]:
                 "There are no missing values so "
                 "does not have a separate mask")
 
-    def _get_offsets_buffer(self) -> Tuple[_PyArrowBuffer, Any]:
+    def _get_offsets_buffer(self) -> Tuple[_PyArrowBuffer, Any] | None:
         """
         Return the buffer containing the offset values for variable-size binary
         data (e.g., variable-length strings) and the buffer's associated dtype.

From 298aac092d950cec7603cac3faf25e33b39a943f Mon Sep 17 00:00:00 2001
From: "Patrick J. Roddy" <patrickjamesroddy@gmail.com>
Date: Thu, 24 Jul 2025 19:05:54 +0100
Subject: [PATCH 07/32] GH-14: [Python] Fix `not-iterable` typing (#26)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix `not-iterable` error

---------

Co-authored-by: Rok Mihevc <rok@mihevc.org>
---
 python/pyarrow/interchange/column.py |  8 +++++++-
 python/pyarrow/tests/test_feather.py | 11 ++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/interchange/column.py b/python/pyarrow/interchange/column.py
index f80c586ff95..2ecc690b80e 100644
--- a/python/pyarrow/interchange/column.py
+++ b/python/pyarrow/interchange/column.py
@@ -313,7 +313,13 @@ def _dtype_from_arrowdtype(
             kind = DtypeKind.CATEGORICAL
             arr = self._col
             indices_dtype = arr.indices.type
-            _, f_string = _PYARROW_KINDS.get(indices_dtype)
+            mapping = _PYARROW_KINDS.get(indices_dtype)
+            if mapping is None:
+                raise ValueError(
+                    f"Dictionary index data type {indices_dtype} "
+                    "not supported by interchange protocol"
+                )
+            _, f_string = mapping
             return kind, bit_width, f_string, Endianness.NATIVE
         else:
             kind, f_string = _PYARROW_KINDS.get(dtype, (None, None))
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 054bf920b26..d2b59fddebb 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -72,11 +72,12 @@ def setup_module(module):
 
 
 def teardown_module(module):
-    for path in TEST_FILES:
-        try:
-            os.remove(path)
-        except os.error:
-            pass
+    if TEST_FILES is not None:
+        for path in TEST_FILES:
+            try:
+                os.remove(path)
+            except os.error:
+                pass
 
 
 @pytest.mark.pandas

From 778e77bfcca8ed59e5e5c195a5ec4d6984b87038 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:08:50 +0200
Subject: [PATCH 08/32] GH-15: [Python] Fix possibly-unbound-attribute (#38)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix possibly-unbound-attribute

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 88119e2d2aa..504c78d61bb 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -44,7 +44,7 @@
 # as here it may be set to the host not target platform
 is_emscripten = (
     sysconfig.get_config_var("SOABI")
-    and sysconfig.get_config_var("SOABI").find("emscripten") != -1
+    and sysconfig.get_config_var("SOABI").find("emscripten") != -1  # type: ignore[possibly-unbound]
 )
 
 
@@ -254,7 +254,7 @@ def _run_cmake(self):
             if os.path.isfile('CMakeCache.txt'):
                 cachefile = open('CMakeCache.txt', 'r')
                 cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
-                                     cachefile.read()).group(1)
+                                     cachefile.read()).group(1)  # type: ignore[possibly-unbound-attribute]
                 cachefile.close()
                 if (cachedir != build_temp):
                     build_base = pjoin(saved_cwd, build_cmd.build_base)

From 004697b626fe18447fb35fc91bc5f730b32d3083 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:11:14 +0200
Subject: [PATCH 09/32] GH-16: [Python] Fix too-many-positional-arguments (#35)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix too-many-positional-arguments

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_flight.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 42de960ac04..91a5aa865db 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -56,7 +56,15 @@ def __exit__(self, exc_type, exc_value, traceback):
             pass
 
     flight = None
-    FlightClient, FlightServerBase = context_like, context_like
+    class MockContextManager:
+        def __enter__(self):
+            return self
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            pass
+    class FlightServerBase(MockContextManager):
+        pass
+    class FlightClient(MockContextManager):
+        pass
     ServerAuthHandler, ClientAuthHandler = object, object
     ServerMiddleware, ServerMiddlewareFactory = object, object
     ClientMiddleware, ClientMiddlewareFactory = object, object

From f31bab8d039ae0364fec89ed2f74cb6f5d7ee9b4 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:13:44 +0200
Subject: [PATCH 10/32] GH-17: [Python] Fix unknown-argument (#36)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix unknown-argument

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_flight.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 91a5aa865db..a5746b462e2 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -57,6 +57,8 @@ def __exit__(self, exc_type, exc_value, traceback):
 
     flight = None
     class MockContextManager:
+        def __init__(self, *args, **kwargs):
+          pass
         def __enter__(self):
             return self
         def __exit__(self, exc_type, exc_val, exc_tb):

From 6ab7643154bf7c8294f69412b0d058cb689631ba Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:20:31 +0200
Subject: [PATCH 11/32] GH-18: [Python] Fix unresolved-attribute (#37)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix unresolved-attribute

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/cffi.py                        |  4 +--
 python/pyarrow/pandas_compat.py               |  8 +++---
 python/pyarrow/parquet/core.py                |  4 +--
 python/pyarrow/tests/parquet/test_pandas.py   |  4 +--
 .../tests/parquet/test_parquet_file.py        |  2 +-
 python/pyarrow/tests/test_cython.py           |  4 +--
 python/pyarrow/tests/test_extension_type.py   |  4 +--
 python/pyarrow/tests/test_flight.py           | 25 ++++++++++++-----
 python/pyarrow/tests/test_json.py             |  4 +--
 python/pyarrow/tests/test_jvm.py              | 12 ++++-----
 python/pyarrow/tests/test_pandas.py           | 27 ++++++++++---------
 python/pyarrow/vendored/docscrape.py          |  6 ++++-
 python/scripts/run_emscripten_tests.py        |  2 +-
 python/setup.py                               |  2 +-
 python/stubs/__lib_pxi/pandas_shim.pyi        | 22 +++++++--------
 python/stubs/cffi.pyi                         |  4 +--
 16 files changed, 76 insertions(+), 58 deletions(-)

diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py
index 1da1a916914..3f5e748daf4 100644
--- a/python/pyarrow/cffi.py
+++ b/python/pyarrow/cffi.py
@@ -17,7 +17,7 @@
 
 from __future__ import absolute_import
 
-import cffi
+from cffi import FFI
 
 c_source = """
     struct ArrowSchema {
@@ -77,5 +77,5 @@
     """
 
 # TODO use out-of-line mode for faster import and avoid C parsing
-ffi = cffi.FFI()
+ffi = FFI()
 ffi.cdef(c_source)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index bb54c3b22c3..5a5e7b10f28 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -25,7 +25,7 @@
 from copy import deepcopy
 import decimal
 from itertools import zip_longest
-import json
+from json import dumps as json_dumps
 import operator
 import re
 import warnings
@@ -276,7 +276,7 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels,
         index_descriptors = index_column_metadata = column_indexes = []
 
     return {
-        b'pandas': json.dumps({
+        b'pandas': json_dumps({
             'index_columns': index_descriptors,
             'column_indexes': column_indexes,
             'columns': column_metadata + index_column_metadata,
@@ -511,7 +511,7 @@ def _get_index_level(df, name):
 def _level_name(name):
     # preserve type when default serializable, otherwise str it
     try:
-        json.dumps(name)
+        json_dumps(name)
         return name
     except TypeError:
         return str(name)
@@ -826,7 +826,7 @@ def table_to_dataframe(
         axes = [columns, index]
         mgr = BlockManager(blocks, axes)
         if _pandas_api.is_ge_v21():
-            df = DataFrame._from_mgr(mgr, mgr.axes)
+            df = DataFrame._from_mgr(mgr, mgr.axes)  # type: ignore[unresolved-attribute]
         else:
             df = DataFrame(mgr)
         return df
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index aaf15c20288..8c1a2ae7822 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -21,7 +21,7 @@
 from functools import reduce
 
 import inspect
-import json
+from json import loads as json_loads
 import os
 import re
 import operator
@@ -1192,7 +1192,7 @@ def add_key_value_metadata(self, key_value_metadata):
 
 
 def _get_pandas_index_columns(keyvalues):
-    return (json.loads(keyvalues[b'pandas'].decode('utf8'))
+    return (json_loads(keyvalues[b'pandas'].decode('utf8'))
             ['index_columns'])
 
 
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 703232b7cac..7f647883561 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -16,7 +16,7 @@
 # under the License.
 
 import io
-import json
+from json import loads as json_loads
 
 try:
     import numpy as np
@@ -65,7 +65,7 @@ def test_pandas_parquet_custom_metadata(tempdir):
     metadata = pq.read_metadata(filename).metadata
     assert b'pandas' in metadata
 
-    js = json.loads(metadata[b'pandas'].decode('utf8'))
+    js = json_loads(metadata[b'pandas'].decode('utf8'))
     assert js['index_columns'] == [{'kind': 'range',
                                     'name': None,
                                     'start': 0, 'stop': 10000,
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 24ffe612ef7..aef0954eacd 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -408,7 +408,7 @@ def test_parquet_file_hugginface_support():
         pytest.skip("fsspec is not installed, skipping Hugging Face test")
 
     fake_hf_module = types.ModuleType("huggingface_hub")
-    fake_hf_module.HfFileSystem = MemoryFileSystem
+    fake_hf_module.HfFileSystem = MemoryFileSystem #  type: ignore[unresolved-attribute]
     with mock.patch.dict("sys.modules", {"huggingface_hub": fake_hf_module}):
         uri = "hf://datasets/apache/arrow/test.parquet"
         table = pa.table({"a": range(10)})
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index e0116a4bb76..fdacb16be29 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -191,7 +191,7 @@ def test_visit_strings(tmpdir):
 
     strings = ['a', 'b', 'c']
     visited = []
-    mod._visit_strings(strings, visited.append)
+    mod._visit_strings(strings, visited.append) #  type: ignore[unresolved-attribute]
 
     assert visited == strings
 
@@ -200,4 +200,4 @@ def raise_on_b(s):
             if s == 'b':
                 raise ValueError('wtf')
 
-        mod._visit_strings(strings, raise_on_b)
+        mod._visit_strings(strings, raise_on_b) #  type: ignore[unresolved-attribute]
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index ebac37e862b..ea1c0afd7ff 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1353,11 +1353,11 @@ def test_cpp_extension_in_python(tmpdir):
     sys.path.insert(0, str(tmpdir))
     mod = __import__('extensions')
 
-    uuid_type = mod._make_uuid_type()
+    uuid_type = mod._make_uuid_type() #  type: ignore[unresolved-attribute]
     assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
-    array = mod._make_uuid_array()
+    array = mod._make_uuid_array() #  type: ignore[unresolved-attribute]
     assert array.type == uuid_type
     assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba']
     assert array[0].as_py() == b'abcdefghijklmno0'
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index a5746b462e2..a3364ef05b8 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -26,7 +26,8 @@
 import threading
 import time
 import traceback
-import json
+from json import dumps as json_dumps
+from json import dumps as json_loads
 from datetime import datetime
 
 try:
@@ -64,9 +65,19 @@ def __enter__(self):
         def __exit__(self, exc_type, exc_val, exc_tb):
             pass
     class FlightServerBase(MockContextManager):
-        pass
+        def serve(self):
+            pass
     class FlightClient(MockContextManager):
-        pass
+        def get_flight_info(self, **kwargs):
+            pass
+        def do_action(self, **kwargs):
+            pass
+        def do_get(self, **kwargs):
+            pass
+        def do_put(self, **kwargs):
+            pass
+        def close(self):
+            pass
     ServerAuthHandler, ClientAuthHandler = object, object
     ServerMiddleware, ServerMiddlewareFactory = object, object
     ClientMiddleware, ClientMiddlewareFactory = object, object
@@ -332,7 +343,7 @@ class InvalidStreamFlightServer(FlightServerBase):
     def do_get(self, context, ticket):
         data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())]
         data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())]
-        assert data1.type != data2.type
+        assert data1[0].type != data2[0].type
         table1 = pa.Table.from_arrays(data1, names=['a'])
         table2 = pa.Table.from_arrays(data2, names=['a'])
         assert table1.schema == self.schema
@@ -1759,7 +1770,7 @@ def test_flight_do_put_limit():
             with pytest.raises(flight.FlightWriteSizeExceededError,
                                match="exceeded soft limit") as excinfo:
                 writer.write_batch(large_batch)
-            assert excinfo.value.limit == 4096
+            assert excinfo.value.limit == 4096 #  type: ignore[unresolved-attribute]
             smaller_batches = [
                 large_batch.slice(0, 384),
                 large_batch.slice(384),
@@ -2373,7 +2384,7 @@ class ActionNoneFlightServer(EchoFlightServer):
 
     def do_action(self, context, action):
         if action.type == "get_value":
-            return [json.dumps(self.VALUES).encode('utf-8')]
+            return [json_dumps(self.VALUES).encode('utf-8')]
         elif action.type == "append":
             self.VALUES.append(True)
             return None
@@ -2390,7 +2401,7 @@ def test_none_action_side_effect():
             FlightClient(('localhost', server.port)) as client:
         client.do_action(flight.Action("append", b""))
         r = client.do_action(flight.Action("get_value", b""))
-        assert json.loads(next(r).body.to_pybytes()) == [True]
+        assert json_loads(next(r).body.to_pybytes()) == [True]
 
 
 @pytest.mark.slow  # Takes a while for gRPC to "realize" writes fail
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index c3f9fe333bd..68ac40063c9 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -20,7 +20,7 @@
 from decimal import Decimal
 import io
 import itertools
-import json
+from json import dumps as json_dumps
 import string
 import unittest
 
@@ -49,7 +49,7 @@ def make_random_json(num_cols=2, num_rows=10, linesep='\r\n'):
     lines = []
     for row in arr.T:
         json_obj = OrderedDict([(k, int(v)) for (k, v) in zip(col_names, row)])
-        lines.append(json.dumps(json_obj))
+        lines.append(json_dumps(json_obj))
     data = linesep.join(lines).encode()
     columns = [pa.array(col, type=pa.int64()) for col in arr]
     expected = pa.Table.from_arrays(columns, col_names)
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index d2ba780efc7..d71380b8666 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import json
+from json import dumps as json_dumps
 import os
 import pyarrow as pa
 import pyarrow.jvm as pa_jvm
@@ -175,23 +175,23 @@ def test_jvm_types(root_allocator, pa_type, jvm_spec, nullable):
         # TODO: This needs to be set for complex types
         'children': []
     }
-    jvm_field = _jvm_field(json.dumps(spec))
+    jvm_field = _jvm_field(json_dumps(spec))
     result = pa_jvm.field(jvm_field)
     expected_field = pa.field('field_name', pa_type, nullable=nullable)
     assert result == expected_field
 
-    jvm_schema = _jvm_schema(json.dumps(spec))
+    jvm_schema = _jvm_schema(json_dumps(spec))
     result = pa_jvm.schema(jvm_schema)
     assert result == pa.schema([expected_field])
 
     # Schema with custom metadata
-    jvm_schema = _jvm_schema(json.dumps(spec), {'meta': 'data'})
+    jvm_schema = _jvm_schema(json_dumps(spec), {'meta': 'data'})
     result = pa_jvm.schema(jvm_schema)
     assert result == pa.schema([expected_field], {'meta': 'data'})
 
     # Schema with custom field metadata
     spec['metadata'] = [{'key': 'field meta', 'value': 'field data'}]
-    jvm_schema = _jvm_schema(json.dumps(spec))
+    jvm_schema = _jvm_schema(json_dumps(spec))
     result = pa_jvm.schema(jvm_schema)
     expected_field = expected_field.with_metadata(
         {'field meta': 'field data'})
@@ -379,7 +379,7 @@ def test_jvm_record_batch(root_allocator, pa_type, py_data, jvm_type,
         # TODO: This needs to be set for complex types
         'children': []
     }
-    jvm_field = _jvm_field(json.dumps(spec))
+    jvm_field = _jvm_field(json_dumps(spec))
 
     # Create VectorSchemaRoot
     jvm_fields = jpype.JClass('java.util.ArrayList')()
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index ceea2527da0..f0bc4a31f34 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -17,7 +17,7 @@
 
 import gc
 import decimal
-import json
+from json import dumps as json_dumps
 import multiprocessing as mp
 import sys
 import warnings
@@ -3264,7 +3264,8 @@ def test_error_sparse(self):
             df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
         except AttributeError:
             # pandas.arrays module introduced in pandas 0.24
-            df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+            from pandas import SparseArray
+            df = pd.DataFrame({'a': SparseArray([1, np.nan, 3])})
         with pytest.raises(TypeError, match="Sparse pandas data"):
             pa.Table.from_pandas(df)
 
@@ -4422,11 +4423,12 @@ def test_convert_to_extension_array(monkeypatch):
 
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     if Version(pd.__version__) < Version("1.3.0.dev"):
+        from pandas.core import integer
         monkeypatch.delattr(
-            pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
+            integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__") #  type: ignore[unresolved-attribute]
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
     assert len(_get_mgr(result).blocks) == 1
@@ -4467,11 +4469,12 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     # (remove the version added above and the actual version for recent pandas)
     if Version(pd.__version__) < Version("1.3.0.dev"):
+        from pandas.core import integer
         monkeypatch.delattr(
-            pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
+            integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__") #  type: ignore[unresolved-attribute]
 
     result = arr.to_pandas()
     assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
@@ -4650,7 +4653,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t1 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['a', 'qux'])
     t1 = t1.replace_schema_metadata({
-        b'pandas': json.dumps(
+        b'pandas': json_dumps(
             {'index_columns': ['qux'],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4679,7 +4682,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t2 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['qux', gen_name_0])
     t2 = t2.replace_schema_metadata({
-        b'pandas': json.dumps(
+        b'pandas': json_dumps(
             {'index_columns': [gen_name_0],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4708,7 +4711,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t3 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['a', gen_name_0])
     t3 = t3.replace_schema_metadata({
-        b'pandas': json.dumps(
+        b'pandas': json_dumps(
             {'index_columns': [gen_name_0],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4737,7 +4740,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t4 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
                               names=['a', 'qux', gen_name_1])
     t4 = t4.replace_schema_metadata({
-        b'pandas': json.dumps(
+        b'pandas': json_dumps(
             {'index_columns': ['qux', gen_name_1],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4771,7 +4774,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t5 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
                               names=['a', gen_name_0, gen_name_1])
     t5 = t5.replace_schema_metadata({
-        b'pandas': json.dumps(
+        b'pandas': json_dumps(
             {'index_columns': [gen_name_0, gen_name_1],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4818,7 +4821,7 @@ def test_metadata_compat_missing_field_name():
 
     # metadata generated by fastparquet 0.3.2 with missing field_names
     table = table.replace_schema_metadata({
-        b'pandas': json.dumps({
+        b'pandas': json_dumps({
             'column_indexes': [
                 {'field_name': None,
                  'metadata': None,
diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py
index 6c4d6e01400..096ef245243 100644
--- a/python/pyarrow/vendored/docscrape.py
+++ b/python/pyarrow/vendored/docscrape.py
@@ -105,6 +105,10 @@ def is_empty(self):
 
 
 class ParseError(Exception):
+    def __init__(self, *args, docstring=None, **kwargs):
+        self.__init__(*args, **kwargs)
+        self.docstring = docstring
+
     def __str__(self):
         message = self.args[0]
         if hasattr(self, 'docstring'):
@@ -153,7 +157,7 @@ def __init__(self, docstring, config=None):
         try:
             self._parse()
         except ParseError as e:
-            e.docstring = orig_docstring
+            e.docstring = orig_docstring #  type: ignore[unresolved-attribute]
             raise
 
     def __getitem__(self, key):
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 53d3dd52bd8..9b833525939 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -130,7 +130,7 @@ def launch_server(dist_dir):
     address = q.get(timeout=50)
     time.sleep(0.1)  # wait to make sure server is started
     yield address
-    p.terminate()
+    p.join()
 
 
 class NodeDriver:
diff --git a/python/setup.py b/python/setup.py
index 504c78d61bb..4e87ecfbfcc 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -48,7 +48,7 @@
 )
 
 
-if Cython.__version__ < '3':
+if Cython.__version__ < '3': #  type: ignore[unresolved-attribute]
     raise Exception(
         'Please update your Cython version. Supported Cython >= 3')
 
diff --git a/python/stubs/__lib_pxi/pandas_shim.pyi b/python/stubs/__lib_pxi/pandas_shim.pyi
index 0e80fae4ebf..29a8485d062 100644
--- a/python/stubs/__lib_pxi/pandas_shim.pyi
+++ b/python/stubs/__lib_pxi/pandas_shim.pyi
@@ -1,7 +1,7 @@
 from types import ModuleType
 from typing import Any, Iterable, TypeGuard
 
-import pandas as pd
+import pandas
 
 from numpy import dtype
 from pandas.core.dtypes.base import ExtensionDtype
@@ -9,8 +9,8 @@ from pandas.core.dtypes.base import ExtensionDtype
 class _PandasAPIShim:
     has_sparse: bool
 
-    def series(self, *args, **kwargs) -> pd.Series: ...
-    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
+    def series(self, *args, **kwargs) -> pandas.Series: ...
+    def data_frame(self, *args, **kwargs) -> pandas.DataFrame: ...
     @property
     def have_pandas(self) -> bool: ...
     @property
@@ -28,21 +28,21 @@ class _PandasAPIShim:
     def is_ge_v23(self) -> bool: ...
     def is_ge_v3(self) -> bool: ...
     @property
-    def categorical_type(self) -> type[pd.Categorical]: ...
+    def categorical_type(self) -> type[pandas.Categorical]: ...
     @property
-    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
+    def datetimetz_type(self) -> type[pandas.DatetimeTZDtype]: ...
     @property
     def extension_dtype(self) -> type[ExtensionDtype]: ...
     def is_array_like(
         self, obj: Any
-    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
-    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
-    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
+    ) -> TypeGuard[pandas.Series | pandas.Index | pandas.Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[pandas.Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[pandas.DatetimeTZDtype]: ...
     def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
     def is_sparse(self, obj: Any) -> bool: ...
-    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
-    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
-    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[pandas.DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[pandas.Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[pandas.Index]: ...
     def get_values(self, obj: Any) -> bool: ...
     def get_rangeindex_attribute(self, level, name): ...
 
diff --git a/python/stubs/cffi.pyi b/python/stubs/cffi.pyi
index 2ae945c5974..217b4b2ea44 100644
--- a/python/stubs/cffi.pyi
+++ b/python/stubs/cffi.pyi
@@ -1,4 +1,4 @@
-import cffi
+from cffi import FFI
 
 c_source: str
-ffi: cffi.FFI
+ffi: FFI

From 0585bf88056081650b191e2298df2278cc894d2d Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:23:45 +0200
Subject: [PATCH 12/32] GH-19: [Python] Fix unresolved-global (#39)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix unresolved-global

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_flight.py    | 2 +-
 python/scripts/run_emscripten_tests.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index a3364ef05b8..5fe85ef4870 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -1120,7 +1120,7 @@ def test_client_wait_for_available():
     server = None
 
     def serve():
-        global server
+        nonlocal server
         time.sleep(0.5)
         server = FlightServerBase(location)
         server.serve()
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 9b833525939..82846a65366 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -114,7 +114,7 @@ def end_headers(self):
 
 
 def run_server_thread(dist_dir, q):
-    global _SERVER_ADDRESS
+    global _SERVER_ADDRESS  # type: ignore[unresolved-global]
     os.chdir(dist_dir)
     server = http.server.HTTPServer(("", 0), TemplateOverrider)
     q.put(server.server_address)

From 68b9347b821bd4518215e8b7fbea33ec43b12b24 Mon Sep 17 00:00:00 2001
From: "Patrick J. Roddy" <patrickjamesroddy@gmail.com>
Date: Thu, 24 Jul 2025 19:29:00 +0100
Subject: [PATCH 13/32] GH-20: [Python] Fix `unsupported-reference` typing
 (#24)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix `unresolved-reference` error

---------

Co-authored-by: Rok Mihevc <rok@mihevc.org>
---
 python/stubs/__lib_pxi/types.pyi | 3 ++-
 python/stubs/_fs.pyi             | 5 +++--
 python/stubs/compute.pyi         | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/stubs/__lib_pxi/types.pyi b/python/stubs/__lib_pxi/types.pyi
index 7fe6c36e332..a7b6062b275 100644
--- a/python/stubs/__lib_pxi/types.pyi
+++ b/python/stubs/__lib_pxi/types.pyi
@@ -29,7 +29,6 @@ from .io import Buffer
 from .scalar import ExtensionScalar
 
 _AsPyType = TypeVar("_AsPyType")
-_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
 class _Weakrefable: ...
 class _Metadata(_Weakrefable): ...
@@ -186,6 +185,8 @@ class DataType(_Weakrefable):
             ArrowSchema pointer.
         """
 
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
 class _BasicDataType(DataType, Generic[_AsPyType]): ...
 class NullType(_BasicDataType[None]): ...
 class BoolType(_BasicDataType[bool]): ...
diff --git a/python/stubs/_fs.pyi b/python/stubs/_fs.pyi
index 7670ef5230d..edce54110f7 100644
--- a/python/stubs/_fs.pyi
+++ b/python/stubs/_fs.pyi
@@ -19,8 +19,6 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
 
 from .lib import NativeFile, _Weakrefable
 
-SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
-
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
@@ -618,6 +616,9 @@ class FileSystem(_Weakrefable):
             The normalized path
         """
 
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
 class LocalFileSystem(FileSystem):
     """
     A FileSystem implementation accessing files on the local machine.
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
index f9039731ee6..1cf52ff07ca 100644
--- a/python/stubs/compute.pyi
+++ b/python/stubs/compute.pyi
@@ -217,9 +217,9 @@ NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
 NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
 
 _NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
 _NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
-_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 _NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
 NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
 _NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)

From 9811a700e5320917a91709350e40fd1a8e5ab126 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:41:21 +0200
Subject: [PATCH 14/32] GH-6: [Python] Fix invalid-argument-type error (#29)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* removing OrderedDict, etc

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/parquet/test_basic.py | 4 ++--
 python/pyarrow/tests/test_compute.py       | 5 +++--
 python/pyarrow/tests/test_pandas.py        | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 67515c5e247..f615e39c21c 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -230,11 +230,11 @@ def test_empty_table_no_columns():
 
 def test_write_nested_zero_length_array_chunk_failure():
     # Bug report in ARROW-3792
-    cols = OrderedDict(
+    cols = dict(
         int32=pa.int32(),
         list_string=pa.list_(pa.string())
     )
-    data = [[], [OrderedDict(int32=1, list_string=('G',)), ]]
+    data = [[], [dict(int32=1, list_string=('G',)), ]]
 
     # This produces a table with a column like
     # <Column name='list_string' type=ListType(list<item: string>)>
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 97f694df1fc..dc35fc6619c 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2561,12 +2561,13 @@ def test_assume_timezone():
                              f"timezone '{timezone}'"):
         pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
+    expected = ambiguous.tz_localize(timezone, ambiguous=np.array([True, True, True]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
+    expected = ambiguous.tz_localize(
+        timezone, ambiguous=np.array([False, False, False]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index f0bc4a31f34..4af077ea0ef 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4963,7 +4963,7 @@ def test_does_not_mutate_timedelta_dtype():
 
     assert np.dtype(np.timedelta64) == expected
 
-    df = pd.DataFrame({"a": [np.timedelta64()]})
+    df = pd.DataFrame({"a": [np.timedelta64("s")]})
     t = pa.Table.from_pandas(df)
     t.to_pandas()
 

From 5b10460d1ee5a14faa74d7381d6c03698af31bb3 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:46:43 +0200
Subject: [PATCH 15/32] GH-27: [Python] Fix call-non-callable error (#28)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* to_sparse appears deprecated, ignore inline

* Update python/pyarrow/tests/test_feather.py

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/tests/test_feather.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index d2b59fddebb..9db63572cb7 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -591,7 +591,7 @@ def test_sparse_dataframe(version):
     # GH #221
     data = {'A': [0, 1, 2],
             'B': [1, 0, 1]}
-    df = pd.DataFrame(data).to_sparse(fill_value=1)
+    df = pd.DataFrame(data).to_sparse(fill_value=1)  # type: ignore[call-non-callable]
     expected = df.to_dense()
     _check_pandas_roundtrip(df, expected, version=version)
 

From 0f841ef0effc15034271e06e855675c220c07757 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 20:49:00 +0200
Subject: [PATCH 16/32] GH-10: [Python] Fix invalid-assignment error (#33)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unresolved-reference` error

* Revert "Fix `unresolved-reference` error"

This reverts commit 7ee3d2f04b2a01a2f1dacccc6d0995f48a3d374d.

* Fix invalid-assignment

---------

Co-authored-by: Patrick J. Roddy <patrickjamesroddy@gmail.com>
---
 python/pyarrow/pandas_compat.py                        |  2 +-
 python/pyarrow/tests/interchange/test_conversion.py    |  2 +-
 .../pyarrow/tests/interchange/test_interchange_spec.py |  3 ++-
 python/pyarrow/tests/parquet/common.py                 |  2 +-
 python/pyarrow/tests/parquet/test_basic.py             |  4 ++--
 .../tests/parquet/test_compliant_nested_type.py        |  2 +-
 python/pyarrow/tests/parquet/test_data_types.py        |  4 ++--
 python/pyarrow/tests/parquet/test_dataset.py           |  4 ++--
 python/pyarrow/tests/parquet/test_datetime.py          |  4 ++--
 python/pyarrow/tests/parquet/test_metadata.py          |  4 ++--
 python/pyarrow/tests/parquet/test_pandas.py            |  4 ++--
 python/pyarrow/tests/parquet/test_parquet_file.py      |  2 +-
 python/pyarrow/tests/parquet/test_parquet_writer.py    |  2 +-
 python/pyarrow/tests/strategies.py                     |  8 ++++----
 python/pyarrow/tests/test_adhoc_memory_leak.py         |  2 +-
 python/pyarrow/tests/test_array.py                     |  2 +-
 python/pyarrow/tests/test_cffi.py                      |  4 ++--
 python/pyarrow/tests/test_compute.py                   |  4 ++--
 python/pyarrow/tests/test_convert_builtin.py           |  2 +-
 python/pyarrow/tests/test_dataset.py                   |  4 ++--
 python/pyarrow/tests/test_extension_type.py            |  2 +-
 python/pyarrow/tests/test_feather.py                   |  2 +-
 python/pyarrow/tests/test_flight.py                    |  2 +-
 python/pyarrow/tests/test_io.py                        |  2 +-
 python/pyarrow/tests/test_ipc.py                       |  2 +-
 python/pyarrow/tests/test_json.py                      |  2 +-
 python/pyarrow/tests/test_pandas.py                    |  2 +-
 python/pyarrow/tests/test_scalars.py                   |  2 +-
 python/pyarrow/tests/test_schema.py                    |  2 +-
 python/pyarrow/tests/test_sparse_tensor.py             | 10 ++++------
 python/pyarrow/tests/test_table.py                     |  2 +-
 python/pyarrow/tests/test_types.py                     |  5 +++--
 python/pyarrow/tests/test_udf.py                       |  2 +-
 33 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 5a5e7b10f28..7b9f5008a10 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 50da6693aff..a584f379738 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index cea694d1c1e..56a424fd57a 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -20,10 +20,11 @@
 import hypothesis.strategies as st
 
 import pytest
+np = None
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 4f5946649b8..7351a4c3e94 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index f615e39c21c..528f8e51683 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -44,12 +44,12 @@
     from pyarrow.tests.pandas_examples import dataframe_with_lists
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 2345855a332..8a64cd0cab7 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -32,7 +32,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index c546bc1532a..66e12d11b21 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
@@ -44,7 +44,7 @@
                                                dataframe_with_lists)
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index b8939443c1d..a162006dc0c 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 import unittest.mock as mock
 
@@ -48,7 +48,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index b89fd97cb91..62904937eb5 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
@@ -41,7 +41,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 148bfebaa67..d8fafde185f 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
@@ -44,7 +44,7 @@
 
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 7f647883561..9b9e7c4e48e 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
@@ -44,7 +44,7 @@
     from pyarrow.tests.parquet.common import (_roundtrip_pandas_dataframe,
                                               alltypes_sample)
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index aef0954eacd..28f25ac8482 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -38,7 +38,7 @@
 
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index d1e9e874ba1..8f163dfc0b5 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -33,7 +33,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 450cce74f1d..243815c59f7 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -24,15 +24,15 @@
 try:
     import hypothesis.extra.numpy as npst
 except ImportError:
-    npst = None
+    npst = None  # type: ignore[assignment]
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    tzst = None
+    tzst = None  # type: ignore[assignment]
 try:
     import zoneinfo
 except ImportError:
-    zoneinfo = None
+    zoneinfo = None  # type: ignore[assignment]
 if sys.platform == 'win32':
     try:
         import tzdata  # noqa:F401
@@ -41,7 +41,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index 76a766984da..9f61bc7ddfe 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 9a5044ce394..a06e3f76570 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -30,7 +30,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 84290a6b880..2d0ff8b45f1 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -24,7 +24,7 @@
 try:
     from pyarrow.cffi import ffi
 except ImportError:
-    ffi = None
+    ffi = None  # type: ignore[assignment]
 
 import pytest
 
@@ -32,7 +32,7 @@
     import pandas as pd
     import pandas.testing as tm
 except ImportError:
-    pd = tm = None
+    pd = tm = None  # type: ignore[assignment]
 
 
 needs_cffi = pytest.mark.skipif(ffi is None,
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index dc35fc6619c..003fb5db41d 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -31,12 +31,12 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 try:
     import pandas as pd
 except ImportError:
-    pd = None
+    pd = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.compute as pc
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 07286125c4c..468bddf58cb 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index e7365643b84..d1cd3f6b8a1 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -32,7 +32,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
@@ -49,7 +49,7 @@
 try:
     import pandas as pd
 except ImportError:
-    pd = None
+    pd = None  # type: ignore[assignment]
 
 try:
     import pyarrow.dataset as ds
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index ea1c0afd7ff..1a851611b14 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 9db63572cb7..8235260f468 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -26,7 +26,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 5fe85ef4870..600c6492780 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a6d3546e57c..b1ec7674f87 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 from pyarrow.util import guid
 from pyarrow import Codec
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index b3b3367223d..26df224ee49 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -28,7 +28,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index 68ac40063c9..ab0602cd198 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 4af077ea0ef..535b95515dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -38,7 +38,7 @@
             VisibleDeprecationWarning as _np_VisibleDeprecationWarning
         )
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 0f62dd98f82..f48761b1918 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.compute as pc
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index a1197ed2d08..6d1ff431819 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index eca8090d77a..27974b80f80 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -26,12 +26,10 @@
 import pyarrow as pa
 
 try:
+    import scipy
     from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
 except ImportError:
-    coo_matrix = None
-    csr_matrix = None
-    csr_array = None
-    coo_array = None
+    scipy = None  # type: ignore[assignment]
 
 try:
     import sparse
@@ -401,7 +399,7 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
     assert np.array_equal(array, result_array)
 
 
-@pytest.mark.skipif(not coo_matrix, reason="requires scipy")
+@pytest.mark.skipif(not scipy, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
@@ -443,7 +441,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
     assert out_scipy_matrix.has_canonical_format
 
 
-@pytest.mark.skipif(not csr_matrix, reason="requires scipy")
+@pytest.mark.skipif(not scipy, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index b65fb7d952c..ead5cbaddc5 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index e628e559b84..338c022a223 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -27,13 +27,13 @@
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    tzst = None
+    tzst = None  # type: ignore[assignment]
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -1322,6 +1322,7 @@ def test_field_modified_copies():
     assert f0.equals(f0_)
 
 
+@pytest.mark.numpy
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
     if np is not None:
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 93004a30618..dbc30867971 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 from pyarrow import compute as pc

From b2395974a4c1e31c55fefed98cea0ac56fc71f6d Mon Sep 17 00:00:00 2001
From: "Patrick J. Roddy" <patrickjamesroddy@gmail.com>
Date: Thu, 24 Jul 2025 19:50:27 +0100
Subject: [PATCH 17/32] GH-21: [Python] Fix `unsupported-operator` typing (#22)

* Add py.typed file to signify that the library is typed

See the relevant PEP https://peps.python.org/pep-0561

* Prepare `pyarrow-stubs` for history merging

MINOR: [Python] Prepare `pyarrow-stubs` for history merging

Co-authored-by: ZhengYu, Xu <zen-xu@outlook.com>

* Add `ty` configuration and suppress error codes

* One line per rule

* Add licence header from original repo for all `.pyi` files

* Revert "Add licence header from original repo for all `.pyi` files"

This reverts commit 1631f3916479ce9e1fd7df1194f61cb420962fd5.

* Prepare for licence merging

* Exclude `stubs` from `rat` test

* Add Apache licence clause to `py.typed`

* Reduce list

* Add `ty` as a step in the action

* Run in the correct directory

* Remove `check` from `pip`

* Fix `unsupported-operator` error

---------

Co-authored-by: Rok Mihevc <rok@mihevc.org>
---
 python/stubs/__lib_pxi/table.pyi |  5 +++--
 python/stubs/_fs.pyi             |  1 -
 python/stubs/_stubs_typing.pyi   | 26 +++++++++++++-------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/stubs/__lib_pxi/table.pyi b/python/stubs/__lib_pxi/table.pyi
index ad34e9b6dff..fbcfb1ef745 100644
--- a/python/stubs/__lib_pxi/table.pyi
+++ b/python/stubs/__lib_pxi/table.pyi
@@ -23,6 +23,7 @@ from typing import (
     Mapping,
     Sequence,
     TypeVar,
+    Union,
     overload,
 )
 
@@ -117,8 +118,8 @@ AggregateOptions: TypeAlias = (
 
 UnarySelector: TypeAlias = str
 NullarySelector: TypeAlias = tuple[()]
-NarySelector: TypeAlias = list[str] | tuple[str, ...]
-ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+NarySelector: TypeAlias = Union[list[str], tuple[str, ...]]
+ColumnSelector: TypeAlias = Union[UnarySelector, NullarySelector, NarySelector]
 
 class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
     """
diff --git a/python/stubs/_fs.pyi b/python/stubs/_fs.pyi
index edce54110f7..9f6e28dcf0f 100644
--- a/python/stubs/_fs.pyi
+++ b/python/stubs/_fs.pyi
@@ -616,7 +616,6 @@ class FileSystem(_Weakrefable):
             The normalized path
         """
 
-
 SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
 
 class LocalFileSystem(FileSystem):
diff --git a/python/stubs/_stubs_typing.pyi b/python/stubs/_stubs_typing.pyi
index c259513f1ea..40d931d24ed 100644
--- a/python/stubs/_stubs_typing.pyi
+++ b/python/stubs/_stubs_typing.pyi
@@ -2,7 +2,7 @@ import datetime as dt
 
 from collections.abc import Sequence
 from decimal import Decimal
-from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar
+from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar, Union
 
 import numpy as np
 
@@ -30,12 +30,12 @@ NullEncoding: TypeAlias = Literal["mask", "encode"]
 NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
 Mask: TypeAlias = Sequence[bool | None] | NDArray[np.bool_] | BooleanArray
 Indices: TypeAlias = Sequence[int] | NDArray[np.integer[Any]] | IntegerArray
-PyScalar: TypeAlias = (
-    bool | int | float | Decimal | str | bytes | dt.date | dt.datetime | dt.time | dt.timedelta
-)
+PyScalar: TypeAlias = Union[
+    bool, int, float, Decimal, str, bytes, dt.date, dt.datetime, dt.time, dt.timedelta
+]
 
 _T = TypeVar("_T")
-SingleOrList: TypeAlias = list[_T] | _T
+SingleOrList: TypeAlias = Union[list[_T], _T]
 
 class SupportEq(Protocol):
     def __eq__(self, other) -> bool: ...
@@ -52,14 +52,14 @@ class SupportLe(Protocol):
 class SupportGe(Protocol):
     def __ge__(self, other) -> bool: ...
 
-FilterTuple: TypeAlias = (
-    tuple[str, Literal["=", "==", "!="], SupportEq]
-    | tuple[str, Literal["<"], SupportLt]
-    | tuple[str, Literal[">"], SupportGt]
-    | tuple[str, Literal["<="], SupportLe]
-    | tuple[str, Literal[">="], SupportGe]
-    | tuple[str, Literal["in", "not in"], Collection]
-)
+FilterTuple: TypeAlias = Union[
+    tuple[str, Literal["=", "==", "!="], SupportEq],
+    tuple[str, Literal["<"], SupportLt],
+    tuple[str, Literal[">"], SupportGt],
+    tuple[str, Literal["<="], SupportLe],
+    tuple[str, Literal[">="], SupportGe],
+    tuple[str, Literal["in", "not in"], Collection]
+]
 
 class Buffer(Protocol):
     def __buffer__(self, flags: int, /) -> memoryview: ...

From c7963f5c64e18181039aeeb506f6f070ee341ab0 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 21:10:57 +0200
Subject: [PATCH 18/32] Merge branch 'unresolved-import' into add-pyarrow-stubs

diff --git c/python/pyarrow/__init__.py i/python/pyarrow/__init__.py
index da2fe96647..45aa2b619f 100644
--- c/python/pyarrow/__init__.py
+++ i/python/pyarrow/__init__.py
@@ -58,8 +58,8 @@ except ImportError:
     except ImportError:
         __version__ = None

-import pyarrow.lib as _lib
-from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,
+import pyarrow.lib as _lib  # type: ignore[unresolved_import]
+from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,  # type: ignore[unresolved_import]
                          MonthDayNano, VersionInfo, build_info, cpp_build_info,
                          cpp_version, cpp_version_info, runtime_info,
                          cpu_count, set_cpu_count, enable_signal_handlers,
@@ -153,7 +153,7 @@ def show_info():
         print(f"  {codec: <20}: {status: <8}")

-from pyarrow.lib import (null, bool_,
+from pyarrow.lib import (null, bool_,  # type: ignore[unresolved_import]
                          int8, int16, int32, int64,
                          uint8, uint16, uint32, uint64,
                          time32, time64, timestamp, date32, date64, duration,
@@ -237,13 +237,13 @@ from pyarrow.lib import (null, bool_,
                          FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)

 # Buffers, allocation
-from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
+from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,  # type: ignore[unresolved_import]
                          default_cpu_memory_manager)

-from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,  # type: ignore[unresolved_import]
                          Codec, compress, decompress, allocate_buffer)

-from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,  # type: ignore[unresolved_import]
                          total_allocated_bytes, set_memory_pool,
                          default_memory_pool, system_memory_pool,
                          jemalloc_memory_pool, mimalloc_memory_pool,
@@ -252,7 +252,7 @@ from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
                          supported_memory_backends)

 # I/O
-from pyarrow.lib import (NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,  # type: ignore[unresolved_import]
                          BufferedInputStream, BufferedOutputStream, CacheOptions,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
@@ -263,12 +263,12 @@ from pyarrow.lib import (NativeFile, PythonFile,
                          input_stream, output_stream,
                          have_libhdfs)

-from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
+from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,  # type: ignore[unresolved_import]
                          concat_arrays, concat_tables, TableGroupBy,
                          RecordBatchReader, concat_batches)

 # Exceptions
-from pyarrow.lib import (ArrowCancelled,
+from pyarrow.lib import (ArrowCancelled,  # type: ignore[unresolved_import]
                          ArrowCapacityError,
                          ArrowException,
                          ArrowKeyError,
diff --git c/python/pyarrow/acero.py i/python/pyarrow/acero.py
index e475e8db5c..dcead124d3 100644
--- c/python/pyarrow/acero.py
+++ i/python/pyarrow/acero.py
@@ -22,11 +22,11 @@
 # distutils: language = c++
 # cython: language_level = 3

-from pyarrow.lib import Table, RecordBatch, array
+from pyarrow.lib import Table, RecordBatch, array  # type: ignore[unresolved_import]
 from pyarrow.compute import Expression, field

 try:
-    from pyarrow._acero import (  # noqa
+    from pyarrow._acero import (  # type: ignore[unresolved_import]  # noqa
         Declaration,
         ExecNodeOptions,
         TableSourceNodeOptions,
@@ -45,7 +45,7 @@ except ImportError as exc:

 try:
     import pyarrow.dataset as ds
-    from pyarrow._dataset import ScanNodeOptions
+    from pyarrow._dataset import ScanNodeOptions  # type: ignore[unresolved_import]
 except ImportError:
     class DatasetModuleStub:
         class Dataset:
diff --git c/python/pyarrow/benchmark.py i/python/pyarrow/benchmark.py
index 25ee1141f0..c0ea1b0ec8 100644
--- c/python/pyarrow/benchmark.py
+++ i/python/pyarrow/benchmark.py
@@ -18,4 +18,4 @@
 # flake8: noqa

-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow.lib import benchmark_PandasObjectIsNull  # type: ignore[unresolved_import]
diff --git c/python/pyarrow/compute.py i/python/pyarrow/compute.py
index fe0afdb0a8..52e2de0e48 100644
--- c/python/pyarrow/compute.py
+++ i/python/pyarrow/compute.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

-from pyarrow._compute import (  # noqa
+from pyarrow._compute import (  # type: ignore[unresolved_import]  # noqa
     Function,
     FunctionOptions,
     FunctionRegistry,
diff --git c/python/pyarrow/conftest.py i/python/pyarrow/conftest.py
index 41beaa1404..d1b1567389 100644
--- c/python/pyarrow/conftest.py
+++ i/python/pyarrow/conftest.py
@@ -21,7 +21,7 @@ import os
 import pyarrow as pa
 from pyarrow import Codec
 from pyarrow import fs
-from pyarrow.lib import is_threading_enabled
+from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
 from pyarrow.tests.util import windows_has_tzdata
 import sys

@@ -120,13 +120,13 @@ except ImportError:
     pass

 try:
-    import fastparquet  # noqa
+    import fastparquet  # type: ignore[unresolved_import]  # noqa
     defaults['fastparquet'] = True
 except ImportError:
     pass

 try:
-    import pyarrow.gandiva  # noqa
+    import pyarrow.gandiva  # type: ignore[unresolved_import]  # noqa
     defaults['gandiva'] = True
 except ImportError:
     pass
diff --git c/python/pyarrow/csv.py i/python/pyarrow/csv.py
index 1ae197f9f2..76ab1c5e03 100644
--- c/python/pyarrow/csv.py
+++ i/python/pyarrow/csv.py
@@ -16,7 +16,7 @@
 # under the License.

-from pyarrow._csv import (  # noqa
+from pyarrow._csv import (  # type: ignore[unresolved_import]  # noqa
     ReadOptions, ParseOptions, ConvertOptions, ISO8601,
     open_csv, read_csv, CSVStreamingReader, write_csv,
     WriteOptions, CSVWriter, InvalidRow)
diff --git c/python/pyarrow/cuda.py i/python/pyarrow/cuda.py
index 18c530d4af..834096cfa3 100644
--- c/python/pyarrow/cuda.py
+++ i/python/pyarrow/cuda.py
@@ -18,7 +18,7 @@
 # flake8: noqa

-from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,  # type: ignore[unresolved_import]
                            HostBuffer, BufferReader, BufferWriter,
                            new_host_buffer,
                            serialize_record_batch, read_message,
diff --git c/python/pyarrow/dataset.py i/python/pyarrow/dataset.py
index ef4f728872..1ab75f8a7f 100644
--- c/python/pyarrow/dataset.py
+++ i/python/pyarrow/dataset.py
@@ -21,7 +21,7 @@ import pyarrow as pa
 from pyarrow.util import _is_iterable, _stringify_path, _is_path_like

 try:
-    from pyarrow._dataset import (  # noqa
+    from pyarrow._dataset import (  # type: ignore[unresolved_import]  # noqa
         CsvFileFormat,
         CsvFragmentScanOptions,
         JsonFileFormat,
@@ -70,7 +70,7 @@ _orc_msg = (
 )

 try:
-    from pyarrow._dataset_orc import OrcFileFormat
+    from pyarrow._dataset_orc import OrcFileFormat  # type: ignore[unresolved_import]
     _orc_available = True
 except ImportError:
     pass
@@ -82,7 +82,7 @@ _parquet_msg = (
 )

 try:
-    from pyarrow._dataset_parquet import (  # noqa
+    from pyarrow._dataset_parquet import (  # type: ignore[unresolved_import]  # noqa
         ParquetDatasetFactory,
         ParquetFactoryOptions,
         ParquetFileFormat,
@@ -98,7 +98,7 @@ except ImportError:

 try:
-    from pyarrow._dataset_parquet_encryption import (  # noqa
+    from pyarrow._dataset_parquet_encryption import (  # type: ignore[unresolved_import]  # noqa
         ParquetDecryptionConfig,
         ParquetEncryptionConfig,
     )
diff --git c/python/pyarrow/feather.py i/python/pyarrow/feather.py
index 241c27706a..28a5c2c547 100644
--- c/python/pyarrow/feather.py
+++ i/python/pyarrow/feather.py
@@ -20,11 +20,12 @@ from collections.abc import Sequence
 import os

 from pyarrow.pandas_compat import _pandas_api  # noqa
-from pyarrow.lib import (Codec, Table,  # noqa
+from pyarrow.lib import (Codec, Table,  # type: ignore[unresolved_import]  # noqa
                          concat_tables, schema)
-import pyarrow.lib as ext
-from pyarrow import _feather
-from pyarrow._feather import FeatherError  # noqa: F401
+import pyarrow.lib as ext  # type: ignore[unresolved_import]
+from pyarrow import _feather  # type: ignore[unresolved_import]
+from pyarrow._feather import FeatherError \
+    # type: ignore[unresolved_import]  # noqa: F401

 class FeatherDataset:
diff --git c/python/pyarrow/flight.py i/python/pyarrow/flight.py
index b1836907c6..d6c4602b45 100644
--- c/python/pyarrow/flight.py
+++ i/python/pyarrow/flight.py
@@ -16,7 +16,7 @@
 # under the License.

 try:
-    from pyarrow._flight import (  # noqa:F401
+    from pyarrow._flight import (  # type: ignore[unresolved_import]  # noqa:F401
         connect,
         Action,
         ActionType,
diff --git c/python/pyarrow/fs.py i/python/pyarrow/fs.py
index 157dbdf938..c7f1b325c7 100644
--- c/python/pyarrow/fs.py
+++ i/python/pyarrow/fs.py
@@ -21,7 +21,7 @@ FileSystem abstraction to interact with various local and remote filesystems.

 from pyarrow.util import _is_path_like, _stringify_path

-from pyarrow._fs import (  # noqa
+from pyarrow._fs import (  # type: ignore[unresolved_import]  # noqa
     FileSelector,
     FileType,
     FileInfo,
@@ -40,22 +40,22 @@ FileStats = FileInfo

 _not_imported = []
 try:
-    from pyarrow._azurefs import AzureFileSystem  # noqa
+    from pyarrow._azurefs import AzureFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("AzureFileSystem")

 try:
-    from pyarrow._hdfs import HadoopFileSystem  # noqa
+    from pyarrow._hdfs import HadoopFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("HadoopFileSystem")

 try:
-    from pyarrow._gcsfs import GcsFileSystem  # noqa
+    from pyarrow._gcsfs import GcsFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("GcsFileSystem")

 try:
-    from pyarrow._s3fs import (  # noqa
+    from pyarrow._s3fs import (  # type: ignore[unresolved_import]  # noqa
         AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
         S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
         finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
diff --git c/python/pyarrow/ipc.py i/python/pyarrow/ipc.py
index 4e23667878..39ec944b72 100644
--- c/python/pyarrow/ipc.py
+++ i/python/pyarrow/ipc.py
@@ -21,14 +21,14 @@ import os

 import pyarrow as pa

-from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # noqa
+from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # type: ignore[unresolved_import]  # noqa
                          Message, MessageReader,
                          RecordBatchReader, _ReadPandasMixin,
                          MetadataVersion, Alignment,
                          read_message, read_record_batch, read_schema,
                          read_tensor, write_tensor,
                          get_record_batch_size, get_tensor_size)
-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]

 class RecordBatchStreamReader(lib._RecordBatchStreamReader):
diff --git c/python/pyarrow/json.py i/python/pyarrow/json.py
index 24e6046135..d4988a1b5a 100644
--- c/python/pyarrow/json.py
+++ i/python/pyarrow/json.py
@@ -16,4 +16,4 @@
 # under the License.

-from pyarrow._json import ReadOptions, ParseOptions, read_json, open_json  # noqa
+from pyarrow._json import ReadOptions, ParseOptions, read_json, open_json  # type: ignore[unresolved_import]  # noqa
diff --git c/python/pyarrow/orc.py i/python/pyarrow/orc.py
index 4e0d66ec66..03c6a48046 100644
--- c/python/pyarrow/orc.py
+++ i/python/pyarrow/orc.py
@@ -19,8 +19,8 @@
 from numbers import Integral
 import warnings

-from pyarrow.lib import Table
-import pyarrow._orc as _orc
+from pyarrow.lib import Table  # type: ignore[unresolved_import]
+import pyarrow._orc as _orc  # type: ignore[unresolved_import]
 from pyarrow.fs import _resolve_filesystem_and_path

diff --git c/python/pyarrow/pandas_compat.py i/python/pyarrow/pandas_compat.py
index 7b9f5008a1..f284d411ab 100644
--- c/python/pyarrow/pandas_compat.py
+++ i/python/pyarrow/pandas_compat.py
@@ -35,7 +35,7 @@ try:
 except ImportError:
     np = None  # type: ignore[assignment]
 import pyarrow as pa
-from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
+from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # type: ignore[unresolved_import]  # noqa

 _logical_type_map = {}
@@ -729,7 +729,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
     pandas Block

     """
-    import pandas.core.internals as _int
+    import pandas.core.internals as _int  # type: ignore[unresolved_import]

     block_arr = item.get('block', None)
     placement = item['placement']
@@ -806,7 +806,8 @@ def table_to_dataframe(
     result = pa.lib.table_to_blocks(options, table, categories,
                                     list(ext_columns_dtypes.keys()))
     if _pandas_api.is_ge_v3():
-        from pandas.api.internals import create_dataframe_from_blocks
+        from pandas.api.internals import create_dataframe_from_blocks \
+            # type: ignore[unresolved_import]

         blocks = [
             _reconstruct_block(
@@ -816,7 +817,8 @@ def table_to_dataframe(
         df = create_dataframe_from_blocks(blocks, index=index, columns=columns)
         return df
     else:
-        from pandas.core.internals import BlockManager
+        from pandas.core.internals import BlockManager \
+            # type: ignore[unresolved_import]
         from pandas import DataFrame

         blocks = [
diff --git c/python/pyarrow/parquet/core.py i/python/pyarrow/parquet/core.py
index 8c1a2ae782..7b6c57f968 100644
--- c/python/pyarrow/parquet/core.py
+++ i/python/pyarrow/parquet/core.py
@@ -29,14 +29,14 @@ import operator
 import pyarrow as pa

 try:
-    import pyarrow._parquet as _parquet
+    import pyarrow._parquet as _parquet  # type: ignore[unresolved_import]
 except ImportError as exc:
     raise ImportError(
         "The pyarrow installation is not built with support "
         f"for the Parquet file format ({str(exc)})"
     ) from None

-from pyarrow._parquet import (ParquetReader, Statistics,  # noqa
+from pyarrow._parquet import (ParquetReader, Statistics,  # type: ignore[unresolved_import]  # noqa
                               FileMetaData, RowGroupMetaData,
                               ColumnChunkMetaData,
                               ParquetSchema, ColumnSchema,
diff --git c/python/pyarrow/parquet/encryption.py i/python/pyarrow/parquet/encryption.py
index df6eed913f..43e3bce04e 100644
--- c/python/pyarrow/parquet/encryption.py
+++ i/python/pyarrow/parquet/encryption.py
@@ -16,7 +16,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from pyarrow._parquet_encryption import (CryptoFactory,   # noqa
+from pyarrow._parquet_encryption import (CryptoFactory,  # type: ignore[unresolved_import]  # noqa
                                          EncryptionConfiguration,
                                          DecryptionConfiguration,
                                          KmsConnectionConfig,
diff --git c/python/pyarrow/substrait.py i/python/pyarrow/substrait.py
index db2c3a96a1..7ddfa790cb 100644
--- c/python/pyarrow/substrait.py
+++ i/python/pyarrow/substrait.py
@@ -16,7 +16,7 @@
 # under the License.

 try:
-    from pyarrow._substrait import (  # noqa
+    from pyarrow._substrait import (  # type: ignore[unresolved_import]  # noqa
         BoundExpressions,
         get_supported_functions,
         run_query,
diff --git c/python/pyarrow/tests/test_builder.py i/python/pyarrow/tests/test_builder.py
index 9187a19b5f..65ca1458d0 100644
--- c/python/pyarrow/tests/test_builder.py
+++ i/python/pyarrow/tests/test_builder.py
@@ -19,7 +19,8 @@ import math
 import weakref

 import pyarrow as pa
-from pyarrow.lib import StringBuilder, StringViewBuilder
+from pyarrow.lib import StringBuilder, StringViewBuilder \
+    # type: ignore[unresolved_import]

 def test_weakref():
diff --git c/python/pyarrow/tests/test_compute.py i/python/pyarrow/tests/test_compute.py
index 003fb5db41..4ab0e63213 100644
--- c/python/pyarrow/tests/test_compute.py
+++ i/python/pyarrow/tests/test_compute.py
@@ -40,7 +40,7 @@ except ImportError:

 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import ArrowNotImplementedError
+from pyarrow.lib import ArrowNotImplementedError  # type: ignore[unresolved_import]

 try:
     import pyarrow.substrait as pas
diff --git c/python/pyarrow/tests/test_cpp_internals.py i/python/pyarrow/tests/test_cpp_internals.py
index 7508d8f0b9..359ef62b1f 100644
--- c/python/pyarrow/tests/test_cpp_internals.py
+++ i/python/pyarrow/tests/test_cpp_internals.py
@@ -20,7 +20,7 @@ from os.path import join as pjoin

 import pytest

-from pyarrow._pyarrow_cpp_tests import get_cpp_tests
+from pyarrow._pyarrow_cpp_tests import get_cpp_tests  # type: ignore[unresolved_import]

 def inject_cpp_tests(ns):
diff --git c/python/pyarrow/tests/test_cuda_numba_interop.py i/python/pyarrow/tests/test_cuda_numba_interop.py
index 3bd81d755f..f211f0046f 100644
--- c/python/pyarrow/tests/test_cuda_numba_interop.py
+++ i/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -26,7 +26,8 @@ dtypes = ['uint8', 'int16', 'float32']
 cuda = pytest.importorskip("pyarrow.cuda")
 nb_cuda = pytest.importorskip("numba.cuda")

-from numba.cuda.cudadrv.devicearray import DeviceNDArray  # noqa: E402
+from numba.cuda.cudadrv.devicearray import DeviceNDArray \
+    # type: ignore[unresolved_import]  # noqa: E402

 context_choices = None
diff --git c/python/pyarrow/tests/test_dataset.py i/python/pyarrow/tests/test_dataset.py
index d1cd3f6b8a..d9a4d3df20 100644
--- c/python/pyarrow/tests/test_dataset.py
+++ i/python/pyarrow/tests/test_dataset.py
@@ -41,7 +41,7 @@ import pyarrow.csv
 import pyarrow.feather
 import pyarrow.fs as fs
 import pyarrow.json
-from pyarrow.lib import is_threading_enabled
+from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
 from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
                                 _configure_s3_limited_user, _filesystem_uri,
                                 change_cwd)
diff --git c/python/pyarrow/tests/test_flight.py i/python/pyarrow/tests/test_flight.py
index 600c649278..0c0bc7089b 100644
--- c/python/pyarrow/tests/test_flight.py
+++ i/python/pyarrow/tests/test_flight.py
@@ -37,7 +37,7 @@ except ImportError:
 import pytest
 import pyarrow as pa

-from pyarrow.lib import IpcReadOptions, tobytes
+from pyarrow.lib import IpcReadOptions, tobytes  # type: ignore[unresolved_import]
 from pyarrow.util import find_free_port
 from pyarrow.tests import util

diff --git c/python/pyarrow/tests/test_fs.py i/python/pyarrow/tests/test_fs.py
index a5a10fa55c..61dcb76b24 100644
--- c/python/pyarrow/tests/test_fs.py
+++ i/python/pyarrow/tests/test_fs.py
@@ -2168,7 +2168,7 @@ def test_fsspec_filesystem_from_uri():
 def test_huggingface_filesystem_from_uri():
     pytest.importorskip("fsspec")
     try:
-        from huggingface_hub import HfFileSystem
+        from huggingface_hub import HfFileSystem  # type: ignore[unresolved_import]
     except ImportError:
         pytest.skip("huggingface_hub not installed")

diff --git c/python/pyarrow/tests/test_gandiva.py i/python/pyarrow/tests/test_gandiva.py
index 80d119a485..01a6d2151a 100644
--- c/python/pyarrow/tests/test_gandiva.py
+++ i/python/pyarrow/tests/test_gandiva.py
@@ -23,7 +23,7 @@ import pyarrow as pa

 @pytest.mark.gandiva
 def test_tree_exp_builder():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     builder = gandiva.TreeExprBuilder()

@@ -65,7 +65,7 @@ def test_tree_exp_builder():

 @pytest.mark.gandiva
 def test_table():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     table = pa.Table.from_arrays([pa.array([1.0, 2.0]), pa.array([3.0, 4.0])],
                                  ['a', 'b'])
@@ -92,7 +92,7 @@ def test_table():

 @pytest.mark.gandiva
 def test_filter():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     table = pa.Table.from_arrays([pa.array([1.0 * i for i in range(10000)])],
                                  ['a'])
@@ -116,7 +116,7 @@ def test_filter():

 @pytest.mark.gandiva
 def test_in_expr():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     arr = pa.array(["ga", "an", "nd", "di", "iv", "va"])
     table = pa.Table.from_arrays([arr], ["a"])
@@ -154,7 +154,7 @@ def test_in_expr():
 @pytest.mark.skip(reason="Gandiva C++ did not have *real* binary, "
                          "time and date support.")
 def test_in_expr_todo():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     # TODO: Implement reasonable support for timestamp, time & date.
     # Current exceptions:
     # pyarrow.lib.ArrowException: ExpressionValidationError:
@@ -227,7 +227,7 @@ def test_in_expr_todo():

 @pytest.mark.gandiva
 def test_boolean():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     table = pa.Table.from_arrays([
         pa.array([1., 31., 46., 3., 57., 44., 22.]),
@@ -254,7 +254,7 @@ def test_boolean():

 @pytest.mark.gandiva
 def test_literals():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     builder = gandiva.TreeExprBuilder()

@@ -294,7 +294,7 @@ def test_literals():

 @pytest.mark.gandiva
 def test_regex():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     elements = ["park", "sparkle", "bright spark and fire", "spark"]
     data = pa.array(elements, type=pa.string())
@@ -318,7 +318,7 @@ def test_regex():

 @pytest.mark.gandiva
 def test_get_registered_function_signatures():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     signatures = gandiva.get_registered_function_signatures()

     assert type(signatures[0].return_type()) is pa.DataType
@@ -328,7 +328,7 @@ def test_get_registered_function_signatures():

 @pytest.mark.gandiva
 def test_filter_project():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     mpool = pa.default_memory_pool()
     # Create a table with some sample data
     array0 = pa.array([10, 12, -20, 5, 21, 29], pa.int32())
@@ -375,7 +375,7 @@ def test_filter_project():

 @pytest.mark.gandiva
 def test_to_string():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     builder = gandiva.TreeExprBuilder()

     assert str(builder.make_literal(2.0, pa.float64())
@@ -395,7 +395,7 @@ def test_to_string():

 @pytest.mark.gandiva
 def test_rejects_none():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]

     builder = gandiva.TreeExprBuilder()

diff --git c/python/pyarrow/tests/test_jvm.py i/python/pyarrow/tests/test_jvm.py
index d71380b866..b048fcea9e 100644
--- c/python/pyarrow/tests/test_jvm.py
+++ i/python/pyarrow/tests/test_jvm.py
@@ -76,8 +76,8 @@ def test_jvm_buffer(root_allocator):

 def test_jvm_buffer_released(root_allocator):
-    import jpype.imports  # noqa
-    from java.lang import IllegalArgumentException
+    import jpype.imports  # type: ignore[unresolved_import]  # noqa
+    from java.lang import IllegalArgumentException  # type: ignore[unresolved_import]

     jvm_buffer = root_allocator.buffer(8)
     jvm_buffer.release()
diff --git c/python/pyarrow/tests/test_misc.py i/python/pyarrow/tests/test_misc.py
index 64f45d8bed..09ac52588e 100644
--- c/python/pyarrow/tests/test_misc.py
+++ i/python/pyarrow/tests/test_misc.py
@@ -22,7 +22,7 @@ import sys
 import pytest

 import pyarrow as pa
-from pyarrow.lib import ArrowInvalid
+from pyarrow.lib import ArrowInvalid  # type: ignore[unresolved_import]

 def test_get_include():
diff --git c/python/pyarrow/tests/test_sparse_tensor.py i/python/pyarrow/tests/test_sparse_tensor.py
index 27974b80f8..e4d141e2a6 100644
--- c/python/pyarrow/tests/test_sparse_tensor.py
+++ i/python/pyarrow/tests/test_sparse_tensor.py
@@ -32,7 +32,7 @@ except ImportError:
     scipy = None  # type: ignore[assignment]

 try:
-    import sparse
+    import sparse  # type: ignore[unresolved_import]
 except ImportError:
     sparse = None

diff --git c/python/pyarrow/tests/test_substrait.py i/python/pyarrow/tests/test_substrait.py
index fcd1c8d48c..fae89d3cee 100644
--- c/python/pyarrow/tests/test_substrait.py
+++ i/python/pyarrow/tests/test_substrait.py
@@ -22,8 +22,9 @@ import pytest

 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import tobytes
-from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError
+from pyarrow.lib import tobytes  # type: ignore[unresolved_import]
+from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError \
+    # type: ignore[unresolved_import]

 try:
     import pyarrow.substrait as substrait
@@ -36,7 +37,7 @@ pytestmark = pytest.mark.substrait

 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context
+    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
     return _get_udf_context(pa.default_memory_pool(), batch_length)

diff --git c/python/pyarrow/tests/test_udf.py i/python/pyarrow/tests/test_udf.py
index dbc3086797..891295a551 100644
--- c/python/pyarrow/tests/test_udf.py
+++ i/python/pyarrow/tests/test_udf.py
@@ -39,7 +39,7 @@ except ImportError:

 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context
+    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
     return _get_udf_context(pa.default_memory_pool(), batch_length)

diff --git c/python/pyarrow/types.py i/python/pyarrow/types.py
index ab4e5d1b99..ee2b7e1440 100644
--- c/python/pyarrow/types.py
+++ i/python/pyarrow/types.py
@@ -20,11 +20,11 @@

 from enum import IntEnum

-from pyarrow.lib import (is_boolean_value,  # noqa
+from pyarrow.lib import (is_boolean_value,  # type: ignore[unresolved_import]  # noqa
                          is_integer_value,
                          is_float_value)

-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]
 from pyarrow.util import doc

diff --git c/python/stubs/__init__.pyi i/python/stubs/__init__.pyi
index 8a0d1e870c..0a1c49067c 100644
--- c/python/stubs/__init__.pyi
+++ i/python/stubs/__init__.pyi
@@ -1,11 +1,11 @@
 # ruff: noqa: F401, I001, E402
 __version__: str

-import pyarrow.lib as _lib
+import pyarrow.lib as _lib  # type: ignore[unresolved_import]

 _gc_enabled: bool

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     BuildInfo,
     RuntimeInfo,
     set_timezone_db_path,
@@ -27,7 +27,7 @@ def show_info() -> None: ...
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     null,
     bool_,
     int8,
@@ -233,9 +233,9 @@ from pyarrow.lib import (
 )

 # Buffers, allocation
-from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager # type: ignore[unresolved_import]

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Buffer,
     ResizableBuffer,
     foreign_buffer,
@@ -246,7 +246,7 @@ from pyarrow.lib import (
     allocate_buffer,
 )

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     MemoryPool,
     LoggingMemoryPool,
     ProxyMemoryPool,
@@ -264,7 +264,7 @@ from pyarrow.lib import (
 )

 # I/O
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     NativeFile,
     PythonFile,
     BufferedInputStream,
@@ -287,7 +287,7 @@ from pyarrow.lib import (
     have_libhdfs,
 )

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     ChunkedArray,
     RecordBatch,
     Table,
@@ -299,7 +299,7 @@ from pyarrow.lib import (
 )

 # Exceptions
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     ArrowCancelled,
     ArrowCapacityError,
     ArrowException,
diff --git c/python/stubs/__lib_pxi/array.pyi i/python/stubs/__lib_pxi/array.pyi
index ffdb8a9c07..37b397f6bb 100644
--- c/python/stubs/__lib_pxi/array.pyi
+++ i/python/stubs/__lib_pxi/array.pyi
@@ -23,8 +23,8 @@ import numpy as np
 import pandas as pd

 from pandas.core.dtypes.base import ExtensionDtype
-from pyarrow._compute import CastOptions
-from pyarrow._stubs_typing import (
+from pyarrow._compute import CastOptions  # type: ignore[unresolved_import]
+from pyarrow._stubs_typing import (  # type: ignore[unresolved_import]
     ArrayLike,
     Indices,
     Mask,
@@ -32,7 +32,7 @@ from pyarrow._stubs_typing import (
     SupportArrowArray,
     SupportArrowDeviceArray,
 )
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Buffer,
     Device,
     MemoryManager,
diff --git c/python/stubs/__lib_pxi/builder.pyi i/python/stubs/__lib_pxi/builder.pyi
index 4a0e9ca470..655d6436da 100644
--- c/python/stubs/__lib_pxi/builder.pyi
+++ i/python/stubs/__lib_pxi/builder.pyi
@@ -1,6 +1,6 @@
 from typing import Iterable

-from pyarrow.lib import MemoryPool, _Weakrefable
+from pyarrow.lib import MemoryPool, _Weakrefable  # type: ignore[unresolved_import]

 from .array import StringArray, StringViewArray

diff --git c/python/stubs/__lib_pxi/device.pyi i/python/stubs/__lib_pxi/device.pyi
index d1b9f39eed..edcabdd796 100644
--- c/python/stubs/__lib_pxi/device.pyi
+++ i/python/stubs/__lib_pxi/device.pyi
@@ -1,6 +1,6 @@
 import enum

-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]

 class DeviceAllocationType(enum.Flag):
     CPU = enum.auto()
diff --git c/python/stubs/__lib_pxi/io.pyi i/python/stubs/__lib_pxi/io.pyi
index 37c8aefb06..488dbf163a 100644
--- c/python/stubs/__lib_pxi/io.pyi
+++ i/python/stubs/__lib_pxi/io.pyi
@@ -17,8 +17,8 @@ else:

 from typing import Any, Literal, SupportsIndex, overload

-from pyarrow._stubs_typing import Compression, SupportPyBuffer
-from pyarrow.lib import MemoryPool, _Weakrefable
+from pyarrow._stubs_typing import Compression, SupportPyBuffer  # type: ignore[unresolved_import]
+from pyarrow.lib import MemoryPool, _Weakrefable  # type: ignore[unresolved_import]

 from .device import Device, DeviceAllocationType, MemoryManager
 from .types import KeyValueMetadata
diff --git c/python/stubs/__lib_pxi/ipc.pyi i/python/stubs/__lib_pxi/ipc.pyi
index 3d72892061..13363e4447 100644
--- c/python/stubs/__lib_pxi/ipc.pyi
+++ i/python/stubs/__lib_pxi/ipc.pyi
@@ -11,8 +11,8 @@ from typing import Iterable, Iterator, Literal, Mapping, NamedTuple

 import pandas as pd

-from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
-from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer  # type: ignore[unresolved_import]
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable  # type: ignore[unresolved_import]

 from .io import Buffer, Codec, NativeFile
 from .types import DictionaryMemo, KeyValueMetadata
diff --git c/python/stubs/__lib_pxi/memory.pyi i/python/stubs/__lib_pxi/memory.pyi
index 57a3bb4f1b..c58bf20dd9 100644
--- c/python/stubs/__lib_pxi/memory.pyi
+++ i/python/stubs/__lib_pxi/memory.pyi
@@ -1,4 +1,4 @@
-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]

 class MemoryPool(_Weakrefable):
     """
diff --git c/python/stubs/__lib_pxi/pandas_shim.pyi i/python/stubs/__lib_pxi/pandas_shim.pyi
index 29a8485d06..c8cebf765a 100644
--- c/python/stubs/__lib_pxi/pandas_shim.pyi
+++ i/python/stubs/__lib_pxi/pandas_shim.pyi
@@ -1,5 +1,5 @@
 from types import ModuleType
-from typing import Any, Iterable, TypeGuard
+from typing import Any, Iterable, TypeGuard  # type: ignore[unresolved_import]

 import pandas

diff --git c/python/stubs/__lib_pxi/scalar.pyi i/python/stubs/__lib_pxi/scalar.pyi
index 81ab501206..cfd4ee6f34 100644
--- c/python/stubs/__lib_pxi/scalar.pyi
+++ i/python/stubs/__lib_pxi/scalar.pyi
@@ -16,8 +16,8 @@ from typing import Any, Generic, Iterator, Literal, Mapping, overload

 import numpy as np

-from pyarrow._compute import CastOptions
-from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from pyarrow._compute import CastOptions  # type: ignore[unresolved_import]
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable  # type: ignore[unresolved_import]
 from typing_extensions import Protocol, TypeVar

 from . import types
diff --git c/python/stubs/__lib_pxi/table.pyi i/python/stubs/__lib_pxi/table.pyi
index fbcfb1ef74..1ce21b6ed2 100644
--- c/python/stubs/__lib_pxi/table.pyi
+++ i/python/stubs/__lib_pxi/table.pyi
@@ -31,7 +31,7 @@ import numpy as np
 import pandas as pd

 from numpy.typing import NDArray
-from pyarrow._compute import (
+from pyarrow._compute import (  # type: ignore[unresolved_import]
     CastOptions,
     CountOptions,
     FunctionOptions,
@@ -39,7 +39,7 @@ from pyarrow._compute import (
     TDigestOptions,
     VarianceOptions,
 )
-from pyarrow._stubs_typing import (
+from pyarrow._stubs_typing import (  # type: ignore[unresolved_import]
     Indices,
     Mask,
     NullEncoding,
@@ -49,12 +49,15 @@ from pyarrow._stubs_typing import (
     SupportArrowDeviceArray,
     SupportArrowStream,
 )
-from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.compute import ArrayOrChunkedArray, Expression  # type: ignore[unresolved_import]
 from pyarrow.interchange.dataframe import _PyArrowDataFrame
-from pyarrow.lib import Device, Field, MemoryManager, MemoryPool, MonthDayNano, Schema
+from pyarrow.lib import Device, Field, MemoryManager, MemoryPool, MonthDayNano, Schema  # type: ignore[unresolved_import]

 from . import array, scalar, types
-from .array import Array, NullableCollection, StructArray, _CastAs, _PandasConvertible
+from .array import (
+    Array, StructArray, _CastAs, _PandasConvertible,
+    NullableCollection,  # type: ignore[unresolved_import]
+)
 from .device import DeviceAllocationType
 from .io import Buffer
 from .ipc import RecordBatchReader
diff --git c/python/stubs/__lib_pxi/tensor.pyi i/python/stubs/__lib_pxi/tensor.pyi
index d849abd0f1..a28804c6e3 100644
--- c/python/stubs/__lib_pxi/tensor.pyi
+++ i/python/stubs/__lib_pxi/tensor.pyi
@@ -7,9 +7,9 @@ else:

 import numpy as np

-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]
 from scipy.sparse import coo_matrix, csr_matrix
-from sparse import COO
+from sparse import COO  # type: ignore[unresolved_import]

 class Tensor(_Weakrefable):
     """
diff --git c/python/stubs/__lib_pxi/types.pyi i/python/stubs/__lib_pxi/types.pyi
index a7b6062b27..d38269ef34 100644
--- c/python/stubs/__lib_pxi/types.pyi
+++ i/python/stubs/__lib_pxi/types.pyi
@@ -14,8 +14,8 @@ from typing import Any, Generic, Iterable, Iterator, Literal, overload
 import numpy as np
 import pandas as pd

-from pyarrow._stubs_typing import SupportArrowSchema
-from pyarrow.lib import (
+from pyarrow._stubs_typing import SupportArrowSchema  # type: ignore[unresolved_import]
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Array,
     ChunkedArray,
     ExtensionArray,
@@ -29,6 +29,7 @@ from .io import Buffer
 from .scalar import ExtensionScalar

 _AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)

 class _Weakrefable: ...
 class _Metadata(_Weakrefable): ...
diff --git c/python/stubs/_compute.pyi i/python/stubs/_compute.pyi
index 3d61ae4278..071fceb392 100644
--- c/python/stubs/_compute.pyi
+++ i/python/stubs/_compute.pyi
@@ -1,12 +1,6 @@
 from typing import (
-    Any,
-    Callable,
-    Iterable,
-    Literal,
-    Sequence,
-    TypeAlias,
-    TypedDict,
-    overload,
+    Any, Callable, Iterable, Literal, Sequence, TypedDict, overload,
+    TypeAlias,  # type: ignore[unresolved_import]
 )

 from . import lib
diff --git c/python/stubs/_fs.pyi i/python/stubs/_fs.pyi
index 9f6e28dcf0..d3b194e3de 100644
--- c/python/stubs/_fs.pyi
+++ i/python/stubs/_fs.pyi
@@ -19,6 +19,8 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]

 from .lib import NativeFile, _Weakrefable

+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
diff --git c/python/stubs/_parquet.pyi i/python/stubs/_parquet.pyi
index a9187df042..053f2d0826 100644
--- c/python/stubs/_parquet.pyi
+++ i/python/stubs/_parquet.pyi
@@ -1,4 +1,7 @@
-from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+from typing import (
+    IO, Any, Iterable, Iterator, Literal, Sequence, TypedDict,
+    TypeAlias,  # type: ignore[unresolved_import]
+)

 from _typeshed import StrPath

diff --git c/python/stubs/_s3fs.pyi i/python/stubs/_s3fs.pyi
index fc13c498bd..8e67c80561 100644
--- c/python/stubs/_s3fs.pyi
+++ i/python/stubs/_s3fs.pyi
@@ -1,6 +1,9 @@
 import enum

-from typing import Literal, NotRequired, Required, TypedDict
+from typing import (
+    Literal, TypedDict,
+    NotRequired, Required,  # type: ignore[unresolved_import]
+)

 from ._fs import FileSystem
 from .lib import KeyValueMetadata
diff --git c/python/stubs/_stubs_typing.pyi i/python/stubs/_stubs_typing.pyi
index 40d931d24e..73bb9f38a9 100644
--- c/python/stubs/_stubs_typing.pyi
+++ i/python/stubs/_stubs_typing.pyi
@@ -2,7 +2,10 @@ import datetime as dt

 from collections.abc import Sequence
 from decimal import Decimal
-from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar, Union
+from typing import (
+    Any, Collection, Literal, Protocol, TypeVar, Union,
+    TypeAlias  # type: ignore[unresolved_import]
+)

 import numpy as np

diff --git c/python/stubs/benchmark.pyi i/python/stubs/benchmark.pyi
index 048973301d..972fad10a5 100644
--- c/python/stubs/benchmark.pyi
+++ i/python/stubs/benchmark.pyi
@@ -1,3 +1,3 @@
-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow.lib import benchmark_PandasObjectIsNull  # type: ignore[unresolved_import]

 __all__ = ["benchmark_PandasObjectIsNull"]
diff --git c/python/stubs/compute.pyi i/python/stubs/compute.pyi
index 1cf52ff07c..775b7fa504 100644
--- c/python/stubs/compute.pyi
+++ i/python/stubs/compute.pyi
@@ -1,94 +1,100 @@
 # ruff: noqa: I001
-from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence  # type: ignore[unresolved_import]
 from collections.abc import Callable

 # Option classes
-from pyarrow._compute import ArraySortOptions as ArraySortOptions
-from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
-from pyarrow._compute import CastOptions as CastOptions
-from pyarrow._compute import CountOptions as CountOptions
-from pyarrow._compute import CumulativeOptions as CumulativeOptions
-from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
-from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
-from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
-from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+	ArraySortOptions as ArraySortOptions,
+	AssumeTimezoneOptions as AssumeTimezoneOptions,
+	CastOptions as CastOptions,
+	CountOptions as CountOptions,
+	CumulativeOptions as CumulativeOptions,
+	CumulativeSumOptions as CumulativeSumOptions,
+	DayOfWeekOptions as DayOfWeekOptions,
+	DictionaryEncodeOptions as DictionaryEncodeOptions,
+	ElementWiseAggregateOptions as ElementWiseAggregateOptions,
+)

 # Expressions
-from pyarrow._compute import Expression as Expression
-from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
-from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
-from pyarrow._compute import FilterOptions as FilterOptions
-from pyarrow._compute import Function as Function
-from pyarrow._compute import FunctionOptions as FunctionOptions
-from pyarrow._compute import FunctionRegistry as FunctionRegistry
-from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
-from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
-from pyarrow._compute import IndexOptions as IndexOptions
-from pyarrow._compute import JoinOptions as JoinOptions
-from pyarrow._compute import Kernel as Kernel
-from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
-from pyarrow._compute import ListSliceOptions as ListSliceOptions
-from pyarrow._compute import MakeStructOptions as MakeStructOptions
-from pyarrow._compute import MapLookupOptions as MapLookupOptions
-from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
-from pyarrow._compute import ModeOptions as ModeOptions
-from pyarrow._compute import NullOptions as NullOptions
-from pyarrow._compute import PadOptions as PadOptions
-from pyarrow._compute import PairwiseOptions as PairwiseOptions
-from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
-from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
-from pyarrow._compute import QuantileOptions as QuantileOptions
-from pyarrow._compute import RandomOptions as RandomOptions
-from pyarrow._compute import RankOptions as RankOptions
-from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
-from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
-from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
-from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
-from pyarrow._compute import RoundOptions as RoundOptions
-from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
-from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
-from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
-from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
-from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
-from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
-from pyarrow._compute import ScalarFunction as ScalarFunction
-from pyarrow._compute import ScalarKernel as ScalarKernel
-from pyarrow._compute import SelectKOptions as SelectKOptions
-from pyarrow._compute import SetLookupOptions as SetLookupOptions
-from pyarrow._compute import SkewOptions as SkewOptions
-from pyarrow._compute import SliceOptions as SliceOptions
-from pyarrow._compute import SortOptions as SortOptions
-from pyarrow._compute import SplitOptions as SplitOptions
-from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
-from pyarrow._compute import StrftimeOptions as StrftimeOptions
-from pyarrow._compute import StrptimeOptions as StrptimeOptions
-from pyarrow._compute import StructFieldOptions as StructFieldOptions
-from pyarrow._compute import TakeOptions as TakeOptions
-from pyarrow._compute import TDigestOptions as TDigestOptions
-from pyarrow._compute import TrimOptions as TrimOptions
-from pyarrow._compute import UdfContext as UdfContext
-from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
-from pyarrow._compute import VarianceOptions as VarianceOptions
-from pyarrow._compute import VectorFunction as VectorFunction
-from pyarrow._compute import VectorKernel as VectorKernel
-from pyarrow._compute import WeekOptions as WeekOptions
-from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+    Expression as Expression,
+	ExtractRegexOptions as ExtractRegexOptions,
+	ExtractRegexSpanOptions as ExtractRegexSpanOptions,
+	FilterOptions as FilterOptions,
+	Function as Function,
+	FunctionOptions as FunctionOptions,
+	FunctionRegistry as FunctionRegistry,
+	HashAggregateFunction as HashAggregateFunction,
+	HashAggregateKernel as HashAggregateKernel,
+	IndexOptions as IndexOptions,
+	JoinOptions as JoinOptions,
+	Kernel as Kernel,
+	ListFlattenOptions as ListFlattenOptions,
+	ListSliceOptions as ListSliceOptions,
+	MakeStructOptions as MakeStructOptions,
+	MapLookupOptions as MapLookupOptions,
+	MatchSubstringOptions as MatchSubstringOptions,
+	ModeOptions as ModeOptions,
+	NullOptions as NullOptions,
+	PadOptions as PadOptions,
+	PairwiseOptions as PairwiseOptions,
+	PartitionNthOptions as PartitionNthOptions,
+	PivotWiderOptions as PivotWiderOptions,
+	QuantileOptions as QuantileOptions,
+	RandomOptions as RandomOptions,
+	RankOptions as RankOptions,
+	RankQuantileOptions as RankQuantileOptions,
+	ReplaceSliceOptions as ReplaceSliceOptions,
+	ReplaceSubstringOptions as ReplaceSubstringOptions,
+	RoundBinaryOptions as RoundBinaryOptions,
+	RoundOptions as RoundOptions,
+	RoundTemporalOptions as RoundTemporalOptions,
+	RoundToMultipleOptions as RoundToMultipleOptions,
+	RunEndEncodeOptions as RunEndEncodeOptions,
+	ScalarAggregateFunction as ScalarAggregateFunction,
+	ScalarAggregateKernel as ScalarAggregateKernel,
+	ScalarAggregateOptions as ScalarAggregateOptions,
+	ScalarFunction as ScalarFunction,
+	ScalarKernel as ScalarKernel,
+	SelectKOptions as SelectKOptions,
+	SetLookupOptions as SetLookupOptions,
+	SkewOptions as SkewOptions,
+	SliceOptions as SliceOptions,
+	SortOptions as SortOptions,
+	SplitOptions as SplitOptions,
+	SplitPatternOptions as SplitPatternOptions,
+	StrftimeOptions as StrftimeOptions,
+	StrptimeOptions as StrptimeOptions,
+	StructFieldOptions as StructFieldOptions,
+	TakeOptions as TakeOptions,
+	TDigestOptions as TDigestOptions,
+	TrimOptions as TrimOptions,
+	UdfContext as UdfContext,
+	Utf8NormalizeOptions as Utf8NormalizeOptions,
+	VarianceOptions as VarianceOptions,
+	VectorFunction as VectorFunction,
+	VectorKernel as VectorKernel,
+	WeekOptions as WeekOptions,
+	WinsorizeOptions as WinsorizeOptions,
+)

 # Functions
-from pyarrow._compute import call_function as call_function
+from pyarrow._compute import call_function as call_function  # type: ignore[unresolved_import]

 # Udf
-from pyarrow._compute import call_tabular_function as call_tabular_function
-from pyarrow._compute import function_registry as function_registry
-from pyarrow._compute import get_function as get_function
-from pyarrow._compute import list_functions as list_functions
-from pyarrow._compute import register_aggregate_function as register_aggregate_function
-from pyarrow._compute import register_scalar_function as register_scalar_function
-from pyarrow._compute import register_tabular_function as register_tabular_function
-from pyarrow._compute import register_vector_function as register_vector_function
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+    call_tabular_function as call_tabular_function,
+    function_registry as function_registry,
+    get_function as get_function,
+    list_functions as list_functions,
+    register_aggregate_function as register_aggregate_function,
+    register_scalar_function as register_scalar_function,
+    register_tabular_function as register_tabular_function,
+    register_vector_function as register_vector_function,
+)

-from pyarrow._compute import _Order, _Placement
-from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from pyarrow._compute import _Order, _Placement  # type: ignore[unresolved_import]
+from pyarrow._stubs_typing import ArrayLike, ScalarLike  # type: ignore[unresolved_import]
 from . import lib

 _P = ParamSpec("_P")
diff --git c/python/stubs/csv.pyi i/python/stubs/csv.pyi
index 510229d7e7..cea5542d1c 100644
--- c/python/stubs/csv.pyi
+++ i/python/stubs/csv.pyi
@@ -1,4 +1,4 @@
-from pyarrow._csv import (
+from pyarrow._csv import (  # type: ignore[unresolved_import]
     ISO8601,
     ConvertOptions,
     CSVStreamingReader,
diff --git c/python/stubs/cuda.pyi i/python/stubs/cuda.pyi
index e11baf7d4e..3c69e746f7 100644
--- c/python/stubs/cuda.pyi
+++ i/python/stubs/cuda.pyi
@@ -1,4 +1,4 @@
-from pyarrow._cuda import (
+from pyarrow._cuda import (  # type: ignore[unresolved_import]
     BufferReader,
     BufferWriter,
     Context,
diff --git c/python/stubs/dataset.pyi i/python/stubs/dataset.pyi
index 98f1a38aa8..a57e9f2f3f 100644
--- c/python/stubs/dataset.pyi
+++ i/python/stubs/dataset.pyi
@@ -1,7 +1,7 @@
-from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload  # type: ignore[unresolved_import]

 from _typeshed import StrPath
-from pyarrow._dataset import (
+from pyarrow._dataset import (  # type: ignore[unresolved_import]
     CsvFileFormat,
     CsvFragmentScanOptions,
     Dataset,
@@ -32,8 +32,8 @@ from pyarrow._dataset import (
     WrittenFile,
     get_partition_keys,
 )
-from pyarrow._dataset_orc import OrcFileFormat
-from pyarrow._dataset_parquet import (
+from pyarrow._dataset_orc import OrcFileFormat  # type: ignore[unresolved_import]
+from pyarrow._dataset_parquet import (  # type: ignore[unresolved_import]
     ParquetDatasetFactory,
     ParquetFactoryOptions,
     ParquetFileFormat,
@@ -43,12 +43,12 @@ from pyarrow._dataset_parquet import (
     ParquetReadOptions,
     RowGroupInfo,
 )
-from pyarrow._dataset_parquet_encryption import (
+from pyarrow._dataset_parquet_encryption import (  # type: ignore[unresolved_import]
     ParquetDecryptionConfig,
     ParquetEncryptionConfig,
 )
 from pyarrow.compute import Expression, field, scalar
-from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table  # type: ignore[unresolved_import]

 from ._fs import SupportedFileSystem

diff --git c/python/stubs/feather.pyi i/python/stubs/feather.pyi
index 9451ee1576..63766cd5d6 100644
--- c/python/stubs/feather.pyi
+++ i/python/stubs/feather.pyi
@@ -3,8 +3,8 @@ from typing import IO, Literal
 import pandas as pd

 from _typeshed import StrPath
-from pyarrow._feather import FeatherError
-from pyarrow.lib import Table
+from pyarrow._feather import FeatherError  # type: ignore[unresolved_import]
+from pyarrow.lib import Table  # type: ignore[unresolved_import]

 __all__ = [
     "FeatherError",
diff --git c/python/stubs/flight.pyi i/python/stubs/flight.pyi
index 9b806ccf30..aa06f3ebec 100644
--- c/python/stubs/flight.pyi
+++ i/python/stubs/flight.pyi
@@ -1,4 +1,4 @@
-from pyarrow._flight import (
+from pyarrow._flight import (  # type: ignore[unresolved_import]
     Action,
     ActionType,
     BasicAuth,
diff --git c/python/stubs/fs.pyi i/python/stubs/fs.pyi
index 6bf75616c1..07a1d7765e 100644
--- c/python/stubs/fs.pyi
+++ i/python/stubs/fs.pyi
@@ -1,4 +1,4 @@
-from pyarrow._fs import (  # noqa
+from pyarrow._fs import (  # type: ignore[unresolved_import]  # noqa
     FileSelector,
     FileType,
     FileInfo,
@@ -10,10 +10,10 @@ from pyarrow._fs import (  # noqa
     PyFileSystem,
     SupportedFileSystem,
 )
-from pyarrow._azurefs import AzureFileSystem
-from pyarrow._hdfs import HadoopFileSystem
-from pyarrow._gcsfs import GcsFileSystem
-from pyarrow._s3fs import (  # noqa
+from pyarrow._azurefs import AzureFileSystem  # type: ignore[unresolved_import]
+from pyarrow._hdfs import HadoopFileSystem  # type: ignore[unresolved_import]
+from pyarrow._gcsfs import GcsFileSystem  # type: ignore[unresolved_import]
+from pyarrow._s3fs import (  # type: ignore[unresolved_import]  # noqa
     AwsDefaultS3RetryStrategy,
     AwsStandardS3RetryStrategy,
     S3FileSystem,
diff --git c/python/stubs/interchange/buffer.pyi i/python/stubs/interchange/buffer.pyi
index 46673961a7..afef5acf35 100644
--- c/python/stubs/interchange/buffer.pyi
+++ i/python/stubs/interchange/buffer.pyi
@@ -1,6 +1,6 @@
 import enum

-from pyarrow.lib import Buffer
+from pyarrow.lib import Buffer  # type: ignore[unresolved_import]

 class DlpackDeviceType(enum.IntEnum):
     """Integer enum for device type codes matching DLPack."""
diff --git c/python/stubs/interchange/column.pyi i/python/stubs/interchange/column.pyi
index e6662867b6..7d89c4ae6b 100644
--- c/python/stubs/interchange/column.pyi
+++ i/python/stubs/interchange/column.pyi
@@ -1,8 +1,8 @@
 import enum

-from typing import Any, Iterable, TypeAlias, TypedDict
+from typing import Any, Iterable, TypeAlias, TypedDict  # type: ignore[unresolved_import]

-from pyarrow.lib import Array, ChunkedArray
+from pyarrow.lib import Array, ChunkedArray  # type: ignore[unresolved_import]

 from .buffer import _PyArrowBuffer

diff --git c/python/stubs/interchange/dataframe.pyi i/python/stubs/interchange/dataframe.pyi
index 526a58926a..7a17dfeb1e 100644
--- c/python/stubs/interchange/dataframe.pyi
+++ i/python/stubs/interchange/dataframe.pyi
@@ -7,7 +7,7 @@ else:
 from typing import Any, Iterable, Sequence

 from pyarrow.interchange.column import _PyArrowColumn
-from pyarrow.lib import RecordBatch, Table
+from pyarrow.lib import RecordBatch, Table  # type: ignore[unresolved_import]

 class _PyArrowDataFrame:
     """
diff --git c/python/stubs/interchange/from_dataframe.pyi i/python/stubs/interchange/from_dataframe.pyi
index b04b626897..ad461270f5 100644
--- c/python/stubs/interchange/from_dataframe.pyi
+++ i/python/stubs/interchange/from_dataframe.pyi
@@ -1,6 +1,6 @@
-from typing import Any, Protocol, TypeAlias
+from typing import Any, Protocol, TypeAlias  # type: ignore[unresolved_import]

-from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table  # type: ignore[unresolved_import]

 from .column import (
     ColumnBuffers,
diff --git c/python/stubs/ipc.pyi i/python/stubs/ipc.pyi
index c7f2af004d..2a5e8294e4 100644
--- c/python/stubs/ipc.pyi
+++ i/python/stubs/ipc.pyi
@@ -1,9 +1,9 @@
 from io import IOBase

 import pandas as pd
-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     IpcReadOptions,
     IpcWriteOptions,
     Message,
diff --git c/python/stubs/json.pyi i/python/stubs/json.pyi
index db1d35e0b8..97b94d5dd7 100644
--- c/python/stubs/json.pyi
+++ i/python/stubs/json.pyi
@@ -1,3 +1,3 @@
-from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json  # type: ignore[unresolved_import]

 __all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git c/python/stubs/parquet/core.pyi i/python/stubs/parquet/core.pyi
index 56b2c8447d..01dce442fe 100644
--- c/python/stubs/parquet/core.pyi
+++ i/python/stubs/parquet/core.pyi
@@ -13,10 +13,10 @@ if sys.version_info >= (3, 10):
 else:
     from typing_extensions import TypeAlias

-from pyarrow import _parquet
-from pyarrow._compute import Expression
-from pyarrow._fs import FileSystem, SupportedFileSystem
-from pyarrow._parquet import (
+from pyarrow import _parquet  # type: ignore[unresolved_import]
+from pyarrow._compute import Expression  # type: ignore[unresolved_import]
+from pyarrow._fs import FileSystem, SupportedFileSystem  # type: ignore[unresolved_import]
+from pyarrow._parquet import (  # type: ignore[unresolved_import]
     ColumnChunkMetaData,
     ColumnSchema,
     FileDecryptionProperties,
@@ -29,9 +29,9 @@ from pyarrow._parquet import (
     SortingColumn,
     Statistics,
 )
-from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow._stubs_typing import FilterTuple, SingleOrList  # type: ignore[unresolved_import]
 from pyarrow.dataset import ParquetFileFragment, Partitioning
-from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table  # type: ignore[unresolved_import]
 from typing_extensions import deprecated

 __all__ = (
diff --git c/python/stubs/parquet/encryption.pyi i/python/stubs/parquet/encryption.pyi
index 5a77dae7ef..daade78e6d 100644
--- c/python/stubs/parquet/encryption.pyi
+++ i/python/stubs/parquet/encryption.pyi
@@ -1,4 +1,4 @@
-from pyarrow._parquet_encryption import (
+from pyarrow._parquet_encryption import (  # type: ignore[unresolved_import]
     CryptoFactory,
     DecryptionConfiguration,
     EncryptionConfiguration,
diff --git c/python/stubs/substrait.pyi i/python/stubs/substrait.pyi
index a56a8a5b40..004439d4c1 100644
--- c/python/stubs/substrait.pyi
+++ i/python/stubs/substrait.pyi
@@ -1,4 +1,4 @@
-from pyarrow._substrait import (
+from pyarrow._substrait import (  # type: ignore[unresolved_import]
     BoundExpressions,
     SubstraitSchema,
     deserialize_expressions,
diff --git c/python/stubs/types.pyi i/python/stubs/types.pyi
index 0cb4f6171d..c128770d17 100644
--- c/python/stubs/types.pyi
+++ i/python/stubs/types.pyi
@@ -11,7 +11,7 @@ if sys.version_info >= (3, 10):
 else:
     from typing_extensions import TypeAlias

-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     BinaryType,
     BinaryViewType,
     BoolType,
---
 python/pyarrow/__init__.py                    |  18 +-
 python/pyarrow/acero.py                       |   6 +-
 python/pyarrow/benchmark.py                   |   2 +-
 python/pyarrow/compute.py                     |   2 +-
 python/pyarrow/conftest.py                    |   6 +-
 python/pyarrow/csv.py                         |   2 +-
 python/pyarrow/cuda.py                        |   2 +-
 python/pyarrow/dataset.py                     |   8 +-
 python/pyarrow/feather.py                     |   9 +-
 python/pyarrow/flight.py                      |   2 +-
 python/pyarrow/fs.py                          |  10 +-
 python/pyarrow/ipc.py                         |   4 +-
 python/pyarrow/json.py                        |   2 +-
 python/pyarrow/orc.py                         |   4 +-
 python/pyarrow/pandas_compat.py               |  10 +-
 python/pyarrow/parquet/core.py                |   4 +-
 python/pyarrow/parquet/encryption.py          |   2 +-
 python/pyarrow/substrait.py                   |   2 +-
 python/pyarrow/tests/test_builder.py          |   3 +-
 python/pyarrow/tests/test_compute.py          |   2 +-
 python/pyarrow/tests/test_cpp_internals.py    |   2 +-
 .../pyarrow/tests/test_cuda_numba_interop.py  |   3 +-
 python/pyarrow/tests/test_dataset.py          |   2 +-
 python/pyarrow/tests/test_flight.py           |   2 +-
 python/pyarrow/tests/test_fs.py               |   2 +-
 python/pyarrow/tests/test_gandiva.py          |  24 +--
 python/pyarrow/tests/test_jvm.py              |   4 +-
 python/pyarrow/tests/test_misc.py             |   2 +-
 python/pyarrow/tests/test_sparse_tensor.py    |   2 +-
 python/pyarrow/tests/test_substrait.py        |   7 +-
 python/pyarrow/tests/test_udf.py              |   2 +-
 python/pyarrow/types.py                       |   4 +-
 python/stubs/__init__.pyi                     |  18 +-
 python/stubs/__lib_pxi/array.pyi              |   6 +-
 python/stubs/__lib_pxi/builder.pyi            |   2 +-
 python/stubs/__lib_pxi/device.pyi             |   2 +-
 python/stubs/__lib_pxi/io.pyi                 |   4 +-
 python/stubs/__lib_pxi/ipc.pyi                |   4 +-
 python/stubs/__lib_pxi/memory.pyi             |   2 +-
 python/stubs/__lib_pxi/pandas_shim.pyi        |   2 +-
 python/stubs/__lib_pxi/scalar.pyi             |   4 +-
 python/stubs/__lib_pxi/table.pyi              |  13 +-
 python/stubs/__lib_pxi/tensor.pyi             |   4 +-
 python/stubs/__lib_pxi/types.pyi              |   5 +-
 python/stubs/_compute.pyi                     |  10 +-
 python/stubs/_fs.pyi                          |   2 +
 python/stubs/_parquet.pyi                     |   5 +-
 python/stubs/_s3fs.pyi                        |   5 +-
 python/stubs/_stubs_typing.pyi                |   5 +-
 python/stubs/benchmark.pyi                    |   2 +-
 python/stubs/compute.pyi                      | 168 +++++++++---------
 python/stubs/csv.pyi                          |   2 +-
 python/stubs/cuda.pyi                         |   2 +-
 python/stubs/dataset.pyi                      |  12 +-
 python/stubs/feather.pyi                      |   4 +-
 python/stubs/flight.pyi                       |   2 +-
 python/stubs/fs.pyi                           |  10 +-
 python/stubs/interchange/buffer.pyi           |   2 +-
 python/stubs/interchange/column.pyi           |   4 +-
 python/stubs/interchange/dataframe.pyi        |   2 +-
 python/stubs/interchange/from_dataframe.pyi   |   4 +-
 python/stubs/ipc.pyi                          |   4 +-
 python/stubs/json.pyi                         |   2 +-
 python/stubs/parquet/core.pyi                 |  12 +-
 python/stubs/parquet/encryption.pyi           |   2 +-
 python/stubs/substrait.pyi                    |   2 +-
 python/stubs/types.pyi                        |   2 +-
 67 files changed, 254 insertions(+), 233 deletions(-)

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index da2fe966475..45aa2b619f8 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -58,8 +58,8 @@ def parse_git(root, **kwargs):
     except ImportError:
         __version__ = None
 
-import pyarrow.lib as _lib
-from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,
+import pyarrow.lib as _lib  # type: ignore[unresolved_import]
+from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,  # type: ignore[unresolved_import]
                          MonthDayNano, VersionInfo, build_info, cpp_build_info,
                          cpp_version, cpp_version_info, runtime_info,
                          cpu_count, set_cpu_count, enable_signal_handlers,
@@ -153,7 +153,7 @@ def print_entry(label, value):
         print(f"  {codec: <20}: {status: <8}")
 
 
-from pyarrow.lib import (null, bool_,
+from pyarrow.lib import (null, bool_,  # type: ignore[unresolved_import]
                          int8, int16, int32, int64,
                          uint8, uint16, uint32, uint64,
                          time32, time64, timestamp, date32, date64, duration,
@@ -237,13 +237,13 @@ def print_entry(label, value):
                          FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
-from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
+from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,  # type: ignore[unresolved_import]
                          default_cpu_memory_manager)
 
-from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,  # type: ignore[unresolved_import]
                          Codec, compress, decompress, allocate_buffer)
 
-from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,  # type: ignore[unresolved_import]
                          total_allocated_bytes, set_memory_pool,
                          default_memory_pool, system_memory_pool,
                          jemalloc_memory_pool, mimalloc_memory_pool,
@@ -252,7 +252,7 @@ def print_entry(label, value):
                          supported_memory_backends)
 
 # I/O
-from pyarrow.lib import (NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,  # type: ignore[unresolved_import]
                          BufferedInputStream, BufferedOutputStream, CacheOptions,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
@@ -263,12 +263,12 @@ def print_entry(label, value):
                          input_stream, output_stream,
                          have_libhdfs)
 
-from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
+from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,  # type: ignore[unresolved_import]
                          concat_arrays, concat_tables, TableGroupBy,
                          RecordBatchReader, concat_batches)
 
 # Exceptions
-from pyarrow.lib import (ArrowCancelled,
+from pyarrow.lib import (ArrowCancelled,  # type: ignore[unresolved_import]
                          ArrowCapacityError,
                          ArrowException,
                          ArrowKeyError,
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index e475e8db5c2..dcead124d31 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -22,11 +22,11 @@
 # distutils: language = c++
 # cython: language_level = 3
 
-from pyarrow.lib import Table, RecordBatch, array
+from pyarrow.lib import Table, RecordBatch, array  # type: ignore[unresolved_import]
 from pyarrow.compute import Expression, field
 
 try:
-    from pyarrow._acero import (  # noqa
+    from pyarrow._acero import (  # type: ignore[unresolved_import]  # noqa
         Declaration,
         ExecNodeOptions,
         TableSourceNodeOptions,
@@ -45,7 +45,7 @@
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow._dataset import ScanNodeOptions
+    from pyarrow._dataset import ScanNodeOptions  # type: ignore[unresolved_import]
 except ImportError:
     class DatasetModuleStub:
         class Dataset:
diff --git a/python/pyarrow/benchmark.py b/python/pyarrow/benchmark.py
index 25ee1141f08..c0ea1b0ec89 100644
--- a/python/pyarrow/benchmark.py
+++ b/python/pyarrow/benchmark.py
@@ -18,4 +18,4 @@
 # flake8: noqa
 
 
-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow.lib import benchmark_PandasObjectIsNull  # type: ignore[unresolved_import]
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index fe0afdb0a87..52e2de0e484 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow._compute import (  # noqa
+from pyarrow._compute import (  # type: ignore[unresolved_import]  # noqa
     Function,
     FunctionOptions,
     FunctionRegistry,
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 41beaa14041..d1b1567389b 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -21,7 +21,7 @@
 import pyarrow as pa
 from pyarrow import Codec
 from pyarrow import fs
-from pyarrow.lib import is_threading_enabled
+from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
 from pyarrow.tests.util import windows_has_tzdata
 import sys
 
@@ -120,13 +120,13 @@
     pass
 
 try:
-    import fastparquet  # noqa
+    import fastparquet  # type: ignore[unresolved_import]  # noqa
     defaults['fastparquet'] = True
 except ImportError:
     pass
 
 try:
-    import pyarrow.gandiva  # noqa
+    import pyarrow.gandiva  # type: ignore[unresolved_import]  # noqa
     defaults['gandiva'] = True
 except ImportError:
     pass
diff --git a/python/pyarrow/csv.py b/python/pyarrow/csv.py
index 1ae197f9f20..76ab1c5e03d 100644
--- a/python/pyarrow/csv.py
+++ b/python/pyarrow/csv.py
@@ -16,7 +16,7 @@
 # under the License.
 
 
-from pyarrow._csv import (  # noqa
+from pyarrow._csv import (  # type: ignore[unresolved_import]  # noqa
     ReadOptions, ParseOptions, ConvertOptions, ISO8601,
     open_csv, read_csv, CSVStreamingReader, write_csv,
     WriteOptions, CSVWriter, InvalidRow)
diff --git a/python/pyarrow/cuda.py b/python/pyarrow/cuda.py
index 18c530d4afe..834096cfa30 100644
--- a/python/pyarrow/cuda.py
+++ b/python/pyarrow/cuda.py
@@ -18,7 +18,7 @@
 # flake8: noqa
 
 
-from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,  # type: ignore[unresolved_import]
                            HostBuffer, BufferReader, BufferWriter,
                            new_host_buffer,
                            serialize_record_batch, read_message,
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index ef4f7288723..1ab75f8a7fb 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -21,7 +21,7 @@
 from pyarrow.util import _is_iterable, _stringify_path, _is_path_like
 
 try:
-    from pyarrow._dataset import (  # noqa
+    from pyarrow._dataset import (  # type: ignore[unresolved_import]  # noqa
         CsvFileFormat,
         CsvFragmentScanOptions,
         JsonFileFormat,
@@ -70,7 +70,7 @@
 )
 
 try:
-    from pyarrow._dataset_orc import OrcFileFormat
+    from pyarrow._dataset_orc import OrcFileFormat  # type: ignore[unresolved_import]
     _orc_available = True
 except ImportError:
     pass
@@ -82,7 +82,7 @@
 )
 
 try:
-    from pyarrow._dataset_parquet import (  # noqa
+    from pyarrow._dataset_parquet import (  # type: ignore[unresolved_import]  # noqa
         ParquetDatasetFactory,
         ParquetFactoryOptions,
         ParquetFileFormat,
@@ -98,7 +98,7 @@
 
 
 try:
-    from pyarrow._dataset_parquet_encryption import (  # noqa
+    from pyarrow._dataset_parquet_encryption import (  # type: ignore[unresolved_import]  # noqa
         ParquetDecryptionConfig,
         ParquetEncryptionConfig,
     )
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 241c27706a6..28a5c2c5476 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -20,11 +20,12 @@
 import os
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
-from pyarrow.lib import (Codec, Table,  # noqa
+from pyarrow.lib import (Codec, Table,  # type: ignore[unresolved_import]  # noqa
                          concat_tables, schema)
-import pyarrow.lib as ext
-from pyarrow import _feather
-from pyarrow._feather import FeatherError  # noqa: F401
+import pyarrow.lib as ext  # type: ignore[unresolved_import]
+from pyarrow import _feather  # type: ignore[unresolved_import]
+from pyarrow._feather import FeatherError \
+    # type: ignore[unresolved_import]  # noqa: F401
 
 
 class FeatherDataset:
diff --git a/python/pyarrow/flight.py b/python/pyarrow/flight.py
index b1836907c67..d6c4602b45d 100644
--- a/python/pyarrow/flight.py
+++ b/python/pyarrow/flight.py
@@ -16,7 +16,7 @@
 # under the License.
 
 try:
-    from pyarrow._flight import (  # noqa:F401
+    from pyarrow._flight import (  # type: ignore[unresolved_import]  # noqa:F401
         connect,
         Action,
         ActionType,
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 157dbdf9380..c7f1b325c70 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -21,7 +21,7 @@
 
 from pyarrow.util import _is_path_like, _stringify_path
 
-from pyarrow._fs import (  # noqa
+from pyarrow._fs import (  # type: ignore[unresolved_import]  # noqa
     FileSelector,
     FileType,
     FileInfo,
@@ -40,22 +40,22 @@
 
 _not_imported = []
 try:
-    from pyarrow._azurefs import AzureFileSystem  # noqa
+    from pyarrow._azurefs import AzureFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("AzureFileSystem")
 
 try:
-    from pyarrow._hdfs import HadoopFileSystem  # noqa
+    from pyarrow._hdfs import HadoopFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("HadoopFileSystem")
 
 try:
-    from pyarrow._gcsfs import GcsFileSystem  # noqa
+    from pyarrow._gcsfs import GcsFileSystem  # type: ignore[unresolved_import]  # noqa
 except ImportError:
     _not_imported.append("GcsFileSystem")
 
 try:
-    from pyarrow._s3fs import (  # noqa
+    from pyarrow._s3fs import (  # type: ignore[unresolved_import]  # noqa
         AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
         S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
         finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 4e236678788..39ec944b728 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -21,14 +21,14 @@
 
 import pyarrow as pa
 
-from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # noqa
+from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # type: ignore[unresolved_import]  # noqa
                          Message, MessageReader,
                          RecordBatchReader, _ReadPandasMixin,
                          MetadataVersion, Alignment,
                          read_message, read_record_batch, read_schema,
                          read_tensor, write_tensor,
                          get_record_batch_size, get_tensor_size)
-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]
 
 
 class RecordBatchStreamReader(lib._RecordBatchStreamReader):
diff --git a/python/pyarrow/json.py b/python/pyarrow/json.py
index 24e60461350..d4988a1b5ae 100644
--- a/python/pyarrow/json.py
+++ b/python/pyarrow/json.py
@@ -16,4 +16,4 @@
 # under the License.
 
 
-from pyarrow._json import ReadOptions, ParseOptions, read_json, open_json  # noqa
+from pyarrow._json import ReadOptions, ParseOptions, read_json, open_json  # type: ignore[unresolved_import]  # noqa
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 4e0d66ec665..03c6a48046e 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -19,8 +19,8 @@
 from numbers import Integral
 import warnings
 
-from pyarrow.lib import Table
-import pyarrow._orc as _orc
+from pyarrow.lib import Table  # type: ignore[unresolved_import]
+import pyarrow._orc as _orc  # type: ignore[unresolved_import]
 from pyarrow.fs import _resolve_filesystem_and_path
 
 
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 7b9f5008a10..f284d411abf 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -35,7 +35,7 @@
 except ImportError:
     np = None  # type: ignore[assignment]
 import pyarrow as pa
-from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
+from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # type: ignore[unresolved_import]  # noqa
 
 
 _logical_type_map = {}
@@ -729,7 +729,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
     pandas Block
 
     """
-    import pandas.core.internals as _int
+    import pandas.core.internals as _int  # type: ignore[unresolved_import]
 
     block_arr = item.get('block', None)
     placement = item['placement']
@@ -806,7 +806,8 @@ def table_to_dataframe(
     result = pa.lib.table_to_blocks(options, table, categories,
                                     list(ext_columns_dtypes.keys()))
     if _pandas_api.is_ge_v3():
-        from pandas.api.internals import create_dataframe_from_blocks
+        from pandas.api.internals import create_dataframe_from_blocks \
+            # type: ignore[unresolved_import]
 
         blocks = [
             _reconstruct_block(
@@ -816,7 +817,8 @@ def table_to_dataframe(
         df = create_dataframe_from_blocks(blocks, index=index, columns=columns)
         return df
     else:
-        from pandas.core.internals import BlockManager
+        from pandas.core.internals import BlockManager \
+            # type: ignore[unresolved_import]
         from pandas import DataFrame
 
         blocks = [
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 8c1a2ae7822..7b6c57f9683 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -29,14 +29,14 @@
 import pyarrow as pa
 
 try:
-    import pyarrow._parquet as _parquet
+    import pyarrow._parquet as _parquet  # type: ignore[unresolved_import]
 except ImportError as exc:
     raise ImportError(
         "The pyarrow installation is not built with support "
         f"for the Parquet file format ({str(exc)})"
     ) from None
 
-from pyarrow._parquet import (ParquetReader, Statistics,  # noqa
+from pyarrow._parquet import (ParquetReader, Statistics,  # type: ignore[unresolved_import]  # noqa
                               FileMetaData, RowGroupMetaData,
                               ColumnChunkMetaData,
                               ParquetSchema, ColumnSchema,
diff --git a/python/pyarrow/parquet/encryption.py b/python/pyarrow/parquet/encryption.py
index df6eed913fa..43e3bce04e6 100644
--- a/python/pyarrow/parquet/encryption.py
+++ b/python/pyarrow/parquet/encryption.py
@@ -16,7 +16,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from pyarrow._parquet_encryption import (CryptoFactory,   # noqa
+from pyarrow._parquet_encryption import (CryptoFactory,  # type: ignore[unresolved_import]  # noqa
                                          EncryptionConfiguration,
                                          DecryptionConfiguration,
                                          KmsConnectionConfig,
diff --git a/python/pyarrow/substrait.py b/python/pyarrow/substrait.py
index db2c3a96a19..7ddfa790cb6 100644
--- a/python/pyarrow/substrait.py
+++ b/python/pyarrow/substrait.py
@@ -16,7 +16,7 @@
 # under the License.
 
 try:
-    from pyarrow._substrait import (  # noqa
+    from pyarrow._substrait import (  # type: ignore[unresolved_import]  # noqa
         BoundExpressions,
         get_supported_functions,
         run_query,
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index 9187a19b5fc..65ca1458d0c 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -19,7 +19,8 @@
 import weakref
 
 import pyarrow as pa
-from pyarrow.lib import StringBuilder, StringViewBuilder
+from pyarrow.lib import StringBuilder, StringViewBuilder \
+    # type: ignore[unresolved_import]
 
 
 def test_weakref():
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 003fb5db41d..4ab0e632134 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -40,7 +40,7 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import ArrowNotImplementedError
+from pyarrow.lib import ArrowNotImplementedError  # type: ignore[unresolved_import]
 
 try:
     import pyarrow.substrait as pas
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 7508d8f0b98..359ef62b1f8 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -20,7 +20,7 @@
 
 import pytest
 
-from pyarrow._pyarrow_cpp_tests import get_cpp_tests
+from pyarrow._pyarrow_cpp_tests import get_cpp_tests  # type: ignore[unresolved_import]
 
 
 def inject_cpp_tests(ns):
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index 3bd81d755f5..f211f0046f0 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -26,7 +26,8 @@
 cuda = pytest.importorskip("pyarrow.cuda")
 nb_cuda = pytest.importorskip("numba.cuda")
 
-from numba.cuda.cudadrv.devicearray import DeviceNDArray  # noqa: E402
+from numba.cuda.cudadrv.devicearray import DeviceNDArray \
+    # type: ignore[unresolved_import]  # noqa: E402
 
 
 context_choices = None
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index d1cd3f6b8a1..d9a4d3df207 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -41,7 +41,7 @@
 import pyarrow.feather
 import pyarrow.fs as fs
 import pyarrow.json
-from pyarrow.lib import is_threading_enabled
+from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
 from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
                                 _configure_s3_limited_user, _filesystem_uri,
                                 change_cwd)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 600c6492780..0c0bc7089b9 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -37,7 +37,7 @@
 import pytest
 import pyarrow as pa
 
-from pyarrow.lib import IpcReadOptions, tobytes
+from pyarrow.lib import IpcReadOptions, tobytes  # type: ignore[unresolved_import]
 from pyarrow.util import find_free_port
 from pyarrow.tests import util
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index a5a10fa55c6..61dcb76b247 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -2168,7 +2168,7 @@ def test_fsspec_filesystem_from_uri():
 def test_huggingface_filesystem_from_uri():
     pytest.importorskip("fsspec")
     try:
-        from huggingface_hub import HfFileSystem
+        from huggingface_hub import HfFileSystem  # type: ignore[unresolved_import]
     except ImportError:
         pytest.skip("huggingface_hub not installed")
 
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 80d119a4853..01a6d2151a0 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -23,7 +23,7 @@
 
 @pytest.mark.gandiva
 def test_tree_exp_builder():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     builder = gandiva.TreeExprBuilder()
 
@@ -65,7 +65,7 @@ def test_tree_exp_builder():
 
 @pytest.mark.gandiva
 def test_table():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     table = pa.Table.from_arrays([pa.array([1.0, 2.0]), pa.array([3.0, 4.0])],
                                  ['a', 'b'])
@@ -92,7 +92,7 @@ def test_table():
 
 @pytest.mark.gandiva
 def test_filter():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     table = pa.Table.from_arrays([pa.array([1.0 * i for i in range(10000)])],
                                  ['a'])
@@ -116,7 +116,7 @@ def test_filter():
 
 @pytest.mark.gandiva
 def test_in_expr():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     arr = pa.array(["ga", "an", "nd", "di", "iv", "va"])
     table = pa.Table.from_arrays([arr], ["a"])
@@ -154,7 +154,7 @@ def test_in_expr():
 @pytest.mark.skip(reason="Gandiva C++ did not have *real* binary, "
                          "time and date support.")
 def test_in_expr_todo():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     # TODO: Implement reasonable support for timestamp, time & date.
     # Current exceptions:
     # pyarrow.lib.ArrowException: ExpressionValidationError:
@@ -227,7 +227,7 @@ def test_in_expr_todo():
 
 @pytest.mark.gandiva
 def test_boolean():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     table = pa.Table.from_arrays([
         pa.array([1., 31., 46., 3., 57., 44., 22.]),
@@ -254,7 +254,7 @@ def test_boolean():
 
 @pytest.mark.gandiva
 def test_literals():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     builder = gandiva.TreeExprBuilder()
 
@@ -294,7 +294,7 @@ def test_literals():
 
 @pytest.mark.gandiva
 def test_regex():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     elements = ["park", "sparkle", "bright spark and fire", "spark"]
     data = pa.array(elements, type=pa.string())
@@ -318,7 +318,7 @@ def test_regex():
 
 @pytest.mark.gandiva
 def test_get_registered_function_signatures():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     signatures = gandiva.get_registered_function_signatures()
 
     assert type(signatures[0].return_type()) is pa.DataType
@@ -328,7 +328,7 @@ def test_get_registered_function_signatures():
 
 @pytest.mark.gandiva
 def test_filter_project():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     mpool = pa.default_memory_pool()
     # Create a table with some sample data
     array0 = pa.array([10, 12, -20, 5, 21, 29], pa.int32())
@@ -375,7 +375,7 @@ def test_filter_project():
 
 @pytest.mark.gandiva
 def test_to_string():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
     builder = gandiva.TreeExprBuilder()
 
     assert str(builder.make_literal(2.0, pa.float64())
@@ -395,7 +395,7 @@ def test_to_string():
 
 @pytest.mark.gandiva
 def test_rejects_none():
-    import pyarrow.gandiva as gandiva
+    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
 
     builder = gandiva.TreeExprBuilder()
 
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index d71380b8666..b048fcea9ee 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -76,8 +76,8 @@ def test_jvm_buffer(root_allocator):
 
 
 def test_jvm_buffer_released(root_allocator):
-    import jpype.imports  # noqa
-    from java.lang import IllegalArgumentException
+    import jpype.imports  # type: ignore[unresolved_import]  # noqa
+    from java.lang import IllegalArgumentException  # type: ignore[unresolved_import]
 
     jvm_buffer = root_allocator.buffer(8)
     jvm_buffer.release()
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 64f45d8bed8..09ac52588ed 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -22,7 +22,7 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.lib import ArrowInvalid
+from pyarrow.lib import ArrowInvalid  # type: ignore[unresolved_import]
 
 
 def test_get_include():
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index 27974b80f80..e4d141e2a6f 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -32,7 +32,7 @@
     scipy = None  # type: ignore[assignment]
 
 try:
-    import sparse
+    import sparse  # type: ignore[unresolved_import]
 except ImportError:
     sparse = None
 
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index fcd1c8d48c5..fae89d3cee5 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -22,8 +22,9 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import tobytes
-from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError
+from pyarrow.lib import tobytes  # type: ignore[unresolved_import]
+from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError \
+    # type: ignore[unresolved_import]
 
 try:
     import pyarrow.substrait as substrait
@@ -36,7 +37,7 @@
 
 
 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context
+    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
     return _get_udf_context(pa.default_memory_pool(), batch_length)
 
 
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index dbc30867971..891295a5519 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -39,7 +39,7 @@
 
 
 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context
+    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
     return _get_udf_context(pa.default_memory_pool(), batch_length)
 
 
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index ab4e5d1b992..ee2b7e1440f 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -20,11 +20,11 @@
 
 from enum import IntEnum
 
-from pyarrow.lib import (is_boolean_value,  # noqa
+from pyarrow.lib import (is_boolean_value,  # type: ignore[unresolved_import]  # noqa
                          is_integer_value,
                          is_float_value)
 
-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]
 from pyarrow.util import doc
 
 
diff --git a/python/stubs/__init__.pyi b/python/stubs/__init__.pyi
index 8a0d1e870c5..0a1c49067c3 100644
--- a/python/stubs/__init__.pyi
+++ b/python/stubs/__init__.pyi
@@ -1,11 +1,11 @@
 # ruff: noqa: F401, I001, E402
 __version__: str
 
-import pyarrow.lib as _lib
+import pyarrow.lib as _lib  # type: ignore[unresolved_import]
 
 _gc_enabled: bool
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     BuildInfo,
     RuntimeInfo,
     set_timezone_db_path,
@@ -27,7 +27,7 @@ def show_info() -> None: ...
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     null,
     bool_,
     int8,
@@ -233,9 +233,9 @@ from pyarrow.lib import (
 )
 
 # Buffers, allocation
-from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager # type: ignore[unresolved_import]
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Buffer,
     ResizableBuffer,
     foreign_buffer,
@@ -246,7 +246,7 @@ from pyarrow.lib import (
     allocate_buffer,
 )
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     MemoryPool,
     LoggingMemoryPool,
     ProxyMemoryPool,
@@ -264,7 +264,7 @@ from pyarrow.lib import (
 )
 
 # I/O
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     NativeFile,
     PythonFile,
     BufferedInputStream,
@@ -287,7 +287,7 @@ from pyarrow.lib import (
     have_libhdfs,
 )
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     ChunkedArray,
     RecordBatch,
     Table,
@@ -299,7 +299,7 @@ from pyarrow.lib import (
 )
 
 # Exceptions
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     ArrowCancelled,
     ArrowCapacityError,
     ArrowException,
diff --git a/python/stubs/__lib_pxi/array.pyi b/python/stubs/__lib_pxi/array.pyi
index ffdb8a9c075..37b397f6bb9 100644
--- a/python/stubs/__lib_pxi/array.pyi
+++ b/python/stubs/__lib_pxi/array.pyi
@@ -23,8 +23,8 @@ import numpy as np
 import pandas as pd
 
 from pandas.core.dtypes.base import ExtensionDtype
-from pyarrow._compute import CastOptions
-from pyarrow._stubs_typing import (
+from pyarrow._compute import CastOptions  # type: ignore[unresolved_import]
+from pyarrow._stubs_typing import (  # type: ignore[unresolved_import]
     ArrayLike,
     Indices,
     Mask,
@@ -32,7 +32,7 @@ from pyarrow._stubs_typing import (
     SupportArrowArray,
     SupportArrowDeviceArray,
 )
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Buffer,
     Device,
     MemoryManager,
diff --git a/python/stubs/__lib_pxi/builder.pyi b/python/stubs/__lib_pxi/builder.pyi
index 4a0e9ca4708..655d6436da8 100644
--- a/python/stubs/__lib_pxi/builder.pyi
+++ b/python/stubs/__lib_pxi/builder.pyi
@@ -1,6 +1,6 @@
 from typing import Iterable
 
-from pyarrow.lib import MemoryPool, _Weakrefable
+from pyarrow.lib import MemoryPool, _Weakrefable  # type: ignore[unresolved_import]
 
 from .array import StringArray, StringViewArray
 
diff --git a/python/stubs/__lib_pxi/device.pyi b/python/stubs/__lib_pxi/device.pyi
index d1b9f39eedd..edcabdd796a 100644
--- a/python/stubs/__lib_pxi/device.pyi
+++ b/python/stubs/__lib_pxi/device.pyi
@@ -1,6 +1,6 @@
 import enum
 
-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]
 
 class DeviceAllocationType(enum.Flag):
     CPU = enum.auto()
diff --git a/python/stubs/__lib_pxi/io.pyi b/python/stubs/__lib_pxi/io.pyi
index 37c8aefb06b..488dbf163a7 100644
--- a/python/stubs/__lib_pxi/io.pyi
+++ b/python/stubs/__lib_pxi/io.pyi
@@ -17,8 +17,8 @@ else:
 
 from typing import Any, Literal, SupportsIndex, overload
 
-from pyarrow._stubs_typing import Compression, SupportPyBuffer
-from pyarrow.lib import MemoryPool, _Weakrefable
+from pyarrow._stubs_typing import Compression, SupportPyBuffer  # type: ignore[unresolved_import]
+from pyarrow.lib import MemoryPool, _Weakrefable  # type: ignore[unresolved_import]
 
 from .device import Device, DeviceAllocationType, MemoryManager
 from .types import KeyValueMetadata
diff --git a/python/stubs/__lib_pxi/ipc.pyi b/python/stubs/__lib_pxi/ipc.pyi
index 3d72892061e..13363e4447a 100644
--- a/python/stubs/__lib_pxi/ipc.pyi
+++ b/python/stubs/__lib_pxi/ipc.pyi
@@ -11,8 +11,8 @@ from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
 
 import pandas as pd
 
-from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
-from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer  # type: ignore[unresolved_import]
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable  # type: ignore[unresolved_import]
 
 from .io import Buffer, Codec, NativeFile
 from .types import DictionaryMemo, KeyValueMetadata
diff --git a/python/stubs/__lib_pxi/memory.pyi b/python/stubs/__lib_pxi/memory.pyi
index 57a3bb4f1b3..c58bf20dd90 100644
--- a/python/stubs/__lib_pxi/memory.pyi
+++ b/python/stubs/__lib_pxi/memory.pyi
@@ -1,4 +1,4 @@
-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]
 
 class MemoryPool(_Weakrefable):
     """
diff --git a/python/stubs/__lib_pxi/pandas_shim.pyi b/python/stubs/__lib_pxi/pandas_shim.pyi
index 29a8485d062..c8cebf765ad 100644
--- a/python/stubs/__lib_pxi/pandas_shim.pyi
+++ b/python/stubs/__lib_pxi/pandas_shim.pyi
@@ -1,5 +1,5 @@
 from types import ModuleType
-from typing import Any, Iterable, TypeGuard
+from typing import Any, Iterable, TypeGuard  # type: ignore[unresolved_import]
 
 import pandas
 
diff --git a/python/stubs/__lib_pxi/scalar.pyi b/python/stubs/__lib_pxi/scalar.pyi
index 81ab5012067..cfd4ee6f34a 100644
--- a/python/stubs/__lib_pxi/scalar.pyi
+++ b/python/stubs/__lib_pxi/scalar.pyi
@@ -16,8 +16,8 @@ from typing import Any, Generic, Iterator, Literal, Mapping, overload
 
 import numpy as np
 
-from pyarrow._compute import CastOptions
-from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from pyarrow._compute import CastOptions  # type: ignore[unresolved_import]
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable  # type: ignore[unresolved_import]
 from typing_extensions import Protocol, TypeVar
 
 from . import types
diff --git a/python/stubs/__lib_pxi/table.pyi b/python/stubs/__lib_pxi/table.pyi
index fbcfb1ef745..1ce21b6ed27 100644
--- a/python/stubs/__lib_pxi/table.pyi
+++ b/python/stubs/__lib_pxi/table.pyi
@@ -31,7 +31,7 @@ import numpy as np
 import pandas as pd
 
 from numpy.typing import NDArray
-from pyarrow._compute import (
+from pyarrow._compute import (  # type: ignore[unresolved_import]
     CastOptions,
     CountOptions,
     FunctionOptions,
@@ -39,7 +39,7 @@ from pyarrow._compute import (
     TDigestOptions,
     VarianceOptions,
 )
-from pyarrow._stubs_typing import (
+from pyarrow._stubs_typing import (  # type: ignore[unresolved_import]
     Indices,
     Mask,
     NullEncoding,
@@ -49,12 +49,15 @@ from pyarrow._stubs_typing import (
     SupportArrowDeviceArray,
     SupportArrowStream,
 )
-from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.compute import ArrayOrChunkedArray, Expression  # type: ignore[unresolved_import]
 from pyarrow.interchange.dataframe import _PyArrowDataFrame
-from pyarrow.lib import Device, Field, MemoryManager, MemoryPool, MonthDayNano, Schema
+from pyarrow.lib import Device, Field, MemoryManager, MemoryPool, MonthDayNano, Schema  # type: ignore[unresolved_import]
 
 from . import array, scalar, types
-from .array import Array, NullableCollection, StructArray, _CastAs, _PandasConvertible
+from .array import (
+    Array, StructArray, _CastAs, _PandasConvertible,
+    NullableCollection,  # type: ignore[unresolved_import]
+)
 from .device import DeviceAllocationType
 from .io import Buffer
 from .ipc import RecordBatchReader
diff --git a/python/stubs/__lib_pxi/tensor.pyi b/python/stubs/__lib_pxi/tensor.pyi
index d849abd0f1f..a28804c6e36 100644
--- a/python/stubs/__lib_pxi/tensor.pyi
+++ b/python/stubs/__lib_pxi/tensor.pyi
@@ -7,9 +7,9 @@ else:
 
 import numpy as np
 
-from pyarrow.lib import _Weakrefable
+from pyarrow.lib import _Weakrefable  # type: ignore[unresolved_import]
 from scipy.sparse import coo_matrix, csr_matrix
-from sparse import COO
+from sparse import COO  # type: ignore[unresolved_import]
 
 class Tensor(_Weakrefable):
     """
diff --git a/python/stubs/__lib_pxi/types.pyi b/python/stubs/__lib_pxi/types.pyi
index a7b6062b275..d38269ef341 100644
--- a/python/stubs/__lib_pxi/types.pyi
+++ b/python/stubs/__lib_pxi/types.pyi
@@ -14,8 +14,8 @@ from typing import Any, Generic, Iterable, Iterator, Literal, overload
 import numpy as np
 import pandas as pd
 
-from pyarrow._stubs_typing import SupportArrowSchema
-from pyarrow.lib import (
+from pyarrow._stubs_typing import SupportArrowSchema  # type: ignore[unresolved_import]
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     Array,
     ChunkedArray,
     ExtensionArray,
@@ -29,6 +29,7 @@ from .io import Buffer
 from .scalar import ExtensionScalar
 
 _AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
 class _Weakrefable: ...
 class _Metadata(_Weakrefable): ...
diff --git a/python/stubs/_compute.pyi b/python/stubs/_compute.pyi
index 3d61ae42787..071fceb3928 100644
--- a/python/stubs/_compute.pyi
+++ b/python/stubs/_compute.pyi
@@ -1,12 +1,6 @@
 from typing import (
-    Any,
-    Callable,
-    Iterable,
-    Literal,
-    Sequence,
-    TypeAlias,
-    TypedDict,
-    overload,
+    Any, Callable, Iterable, Literal, Sequence, TypedDict, overload,
+    TypeAlias,  # type: ignore[unresolved_import]
 )
 
 from . import lib
diff --git a/python/stubs/_fs.pyi b/python/stubs/_fs.pyi
index 9f6e28dcf0f..d3b194e3ded 100644
--- a/python/stubs/_fs.pyi
+++ b/python/stubs/_fs.pyi
@@ -19,6 +19,8 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
 
 from .lib import NativeFile, _Weakrefable
 
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
diff --git a/python/stubs/_parquet.pyi b/python/stubs/_parquet.pyi
index a9187df0428..053f2d08266 100644
--- a/python/stubs/_parquet.pyi
+++ b/python/stubs/_parquet.pyi
@@ -1,4 +1,7 @@
-from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+from typing import (
+    IO, Any, Iterable, Iterator, Literal, Sequence, TypedDict,
+    TypeAlias,  # type: ignore[unresolved_import]
+)
 
 from _typeshed import StrPath
 
diff --git a/python/stubs/_s3fs.pyi b/python/stubs/_s3fs.pyi
index fc13c498bd9..8e67c805619 100644
--- a/python/stubs/_s3fs.pyi
+++ b/python/stubs/_s3fs.pyi
@@ -1,6 +1,9 @@
 import enum
 
-from typing import Literal, NotRequired, Required, TypedDict
+from typing import (
+    Literal, TypedDict,
+    NotRequired, Required,  # type: ignore[unresolved_import]
+)
 
 from ._fs import FileSystem
 from .lib import KeyValueMetadata
diff --git a/python/stubs/_stubs_typing.pyi b/python/stubs/_stubs_typing.pyi
index 40d931d24ed..73bb9f38a95 100644
--- a/python/stubs/_stubs_typing.pyi
+++ b/python/stubs/_stubs_typing.pyi
@@ -2,7 +2,10 @@ import datetime as dt
 
 from collections.abc import Sequence
 from decimal import Decimal
-from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar, Union
+from typing import (
+    Any, Collection, Literal, Protocol, TypeVar, Union,
+    TypeAlias  # type: ignore[unresolved_import]
+)
 
 import numpy as np
 
diff --git a/python/stubs/benchmark.pyi b/python/stubs/benchmark.pyi
index 048973301dc..972fad10a5f 100644
--- a/python/stubs/benchmark.pyi
+++ b/python/stubs/benchmark.pyi
@@ -1,3 +1,3 @@
-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow.lib import benchmark_PandasObjectIsNull  # type: ignore[unresolved_import]
 
 __all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
index 1cf52ff07ca..775b7fa504e 100644
--- a/python/stubs/compute.pyi
+++ b/python/stubs/compute.pyi
@@ -1,94 +1,100 @@
 # ruff: noqa: I001
-from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence  # type: ignore[unresolved_import]
 from collections.abc import Callable
 
 # Option classes
-from pyarrow._compute import ArraySortOptions as ArraySortOptions
-from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
-from pyarrow._compute import CastOptions as CastOptions
-from pyarrow._compute import CountOptions as CountOptions
-from pyarrow._compute import CumulativeOptions as CumulativeOptions
-from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
-from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
-from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
-from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+	ArraySortOptions as ArraySortOptions,
+	AssumeTimezoneOptions as AssumeTimezoneOptions,
+	CastOptions as CastOptions,
+	CountOptions as CountOptions,
+	CumulativeOptions as CumulativeOptions,
+	CumulativeSumOptions as CumulativeSumOptions,
+	DayOfWeekOptions as DayOfWeekOptions,
+	DictionaryEncodeOptions as DictionaryEncodeOptions,
+	ElementWiseAggregateOptions as ElementWiseAggregateOptions,
+)
 
 # Expressions
-from pyarrow._compute import Expression as Expression
-from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
-from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
-from pyarrow._compute import FilterOptions as FilterOptions
-from pyarrow._compute import Function as Function
-from pyarrow._compute import FunctionOptions as FunctionOptions
-from pyarrow._compute import FunctionRegistry as FunctionRegistry
-from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
-from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
-from pyarrow._compute import IndexOptions as IndexOptions
-from pyarrow._compute import JoinOptions as JoinOptions
-from pyarrow._compute import Kernel as Kernel
-from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
-from pyarrow._compute import ListSliceOptions as ListSliceOptions
-from pyarrow._compute import MakeStructOptions as MakeStructOptions
-from pyarrow._compute import MapLookupOptions as MapLookupOptions
-from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
-from pyarrow._compute import ModeOptions as ModeOptions
-from pyarrow._compute import NullOptions as NullOptions
-from pyarrow._compute import PadOptions as PadOptions
-from pyarrow._compute import PairwiseOptions as PairwiseOptions
-from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
-from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
-from pyarrow._compute import QuantileOptions as QuantileOptions
-from pyarrow._compute import RandomOptions as RandomOptions
-from pyarrow._compute import RankOptions as RankOptions
-from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
-from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
-from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
-from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
-from pyarrow._compute import RoundOptions as RoundOptions
-from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
-from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
-from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
-from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
-from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
-from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
-from pyarrow._compute import ScalarFunction as ScalarFunction
-from pyarrow._compute import ScalarKernel as ScalarKernel
-from pyarrow._compute import SelectKOptions as SelectKOptions
-from pyarrow._compute import SetLookupOptions as SetLookupOptions
-from pyarrow._compute import SkewOptions as SkewOptions
-from pyarrow._compute import SliceOptions as SliceOptions
-from pyarrow._compute import SortOptions as SortOptions
-from pyarrow._compute import SplitOptions as SplitOptions
-from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
-from pyarrow._compute import StrftimeOptions as StrftimeOptions
-from pyarrow._compute import StrptimeOptions as StrptimeOptions
-from pyarrow._compute import StructFieldOptions as StructFieldOptions
-from pyarrow._compute import TakeOptions as TakeOptions
-from pyarrow._compute import TDigestOptions as TDigestOptions
-from pyarrow._compute import TrimOptions as TrimOptions
-from pyarrow._compute import UdfContext as UdfContext
-from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
-from pyarrow._compute import VarianceOptions as VarianceOptions
-from pyarrow._compute import VectorFunction as VectorFunction
-from pyarrow._compute import VectorKernel as VectorKernel
-from pyarrow._compute import WeekOptions as WeekOptions
-from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+    Expression as Expression,
+	ExtractRegexOptions as ExtractRegexOptions,
+	ExtractRegexSpanOptions as ExtractRegexSpanOptions,
+	FilterOptions as FilterOptions,
+	Function as Function,
+	FunctionOptions as FunctionOptions,
+	FunctionRegistry as FunctionRegistry,
+	HashAggregateFunction as HashAggregateFunction,
+	HashAggregateKernel as HashAggregateKernel,
+	IndexOptions as IndexOptions,
+	JoinOptions as JoinOptions,
+	Kernel as Kernel,
+	ListFlattenOptions as ListFlattenOptions,
+	ListSliceOptions as ListSliceOptions,
+	MakeStructOptions as MakeStructOptions,
+	MapLookupOptions as MapLookupOptions,
+	MatchSubstringOptions as MatchSubstringOptions,
+	ModeOptions as ModeOptions,
+	NullOptions as NullOptions,
+	PadOptions as PadOptions,
+	PairwiseOptions as PairwiseOptions,
+	PartitionNthOptions as PartitionNthOptions,
+	PivotWiderOptions as PivotWiderOptions,
+	QuantileOptions as QuantileOptions,
+	RandomOptions as RandomOptions,
+	RankOptions as RankOptions,
+	RankQuantileOptions as RankQuantileOptions,
+	ReplaceSliceOptions as ReplaceSliceOptions,
+	ReplaceSubstringOptions as ReplaceSubstringOptions,
+	RoundBinaryOptions as RoundBinaryOptions,
+	RoundOptions as RoundOptions,
+	RoundTemporalOptions as RoundTemporalOptions,
+	RoundToMultipleOptions as RoundToMultipleOptions,
+	RunEndEncodeOptions as RunEndEncodeOptions,
+	ScalarAggregateFunction as ScalarAggregateFunction,
+	ScalarAggregateKernel as ScalarAggregateKernel,
+	ScalarAggregateOptions as ScalarAggregateOptions,
+	ScalarFunction as ScalarFunction,
+	ScalarKernel as ScalarKernel,
+	SelectKOptions as SelectKOptions,
+	SetLookupOptions as SetLookupOptions,
+	SkewOptions as SkewOptions,
+	SliceOptions as SliceOptions,
+	SortOptions as SortOptions,
+	SplitOptions as SplitOptions,
+	SplitPatternOptions as SplitPatternOptions,
+	StrftimeOptions as StrftimeOptions,
+	StrptimeOptions as StrptimeOptions,
+	StructFieldOptions as StructFieldOptions,
+	TakeOptions as TakeOptions,
+	TDigestOptions as TDigestOptions,
+	TrimOptions as TrimOptions,
+	UdfContext as UdfContext,
+	Utf8NormalizeOptions as Utf8NormalizeOptions,
+	VarianceOptions as VarianceOptions,
+	VectorFunction as VectorFunction,
+	VectorKernel as VectorKernel,
+	WeekOptions as WeekOptions,
+	WinsorizeOptions as WinsorizeOptions,
+)
 
 # Functions
-from pyarrow._compute import call_function as call_function
+from pyarrow._compute import call_function as call_function  # type: ignore[unresolved_import]
 
 # Udf
-from pyarrow._compute import call_tabular_function as call_tabular_function
-from pyarrow._compute import function_registry as function_registry
-from pyarrow._compute import get_function as get_function
-from pyarrow._compute import list_functions as list_functions
-from pyarrow._compute import register_aggregate_function as register_aggregate_function
-from pyarrow._compute import register_scalar_function as register_scalar_function
-from pyarrow._compute import register_tabular_function as register_tabular_function
-from pyarrow._compute import register_vector_function as register_vector_function
-
-from pyarrow._compute import _Order, _Placement
-from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from pyarrow._compute import (  # type: ignore[unresolved_import]
+    call_tabular_function as call_tabular_function,
+    function_registry as function_registry,
+    get_function as get_function,
+    list_functions as list_functions,
+    register_aggregate_function as register_aggregate_function,
+    register_scalar_function as register_scalar_function,
+    register_tabular_function as register_tabular_function,
+    register_vector_function as register_vector_function,
+)
+
+from pyarrow._compute import _Order, _Placement  # type: ignore[unresolved_import]
+from pyarrow._stubs_typing import ArrayLike, ScalarLike  # type: ignore[unresolved_import]
 from . import lib
 
 _P = ParamSpec("_P")
diff --git a/python/stubs/csv.pyi b/python/stubs/csv.pyi
index 510229d7e72..cea5542d1c5 100644
--- a/python/stubs/csv.pyi
+++ b/python/stubs/csv.pyi
@@ -1,4 +1,4 @@
-from pyarrow._csv import (
+from pyarrow._csv import (  # type: ignore[unresolved_import]
     ISO8601,
     ConvertOptions,
     CSVStreamingReader,
diff --git a/python/stubs/cuda.pyi b/python/stubs/cuda.pyi
index e11baf7d4e7..3c69e746f7b 100644
--- a/python/stubs/cuda.pyi
+++ b/python/stubs/cuda.pyi
@@ -1,4 +1,4 @@
-from pyarrow._cuda import (
+from pyarrow._cuda import (  # type: ignore[unresolved_import]
     BufferReader,
     BufferWriter,
     Context,
diff --git a/python/stubs/dataset.pyi b/python/stubs/dataset.pyi
index 98f1a38aa85..a57e9f2f3f0 100644
--- a/python/stubs/dataset.pyi
+++ b/python/stubs/dataset.pyi
@@ -1,7 +1,7 @@
-from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload  # type: ignore[unresolved_import]
 
 from _typeshed import StrPath
-from pyarrow._dataset import (
+from pyarrow._dataset import (  # type: ignore[unresolved_import]
     CsvFileFormat,
     CsvFragmentScanOptions,
     Dataset,
@@ -32,8 +32,8 @@ from pyarrow._dataset import (
     WrittenFile,
     get_partition_keys,
 )
-from pyarrow._dataset_orc import OrcFileFormat
-from pyarrow._dataset_parquet import (
+from pyarrow._dataset_orc import OrcFileFormat  # type: ignore[unresolved_import]
+from pyarrow._dataset_parquet import (  # type: ignore[unresolved_import]
     ParquetDatasetFactory,
     ParquetFactoryOptions,
     ParquetFileFormat,
@@ -43,12 +43,12 @@ from pyarrow._dataset_parquet import (
     ParquetReadOptions,
     RowGroupInfo,
 )
-from pyarrow._dataset_parquet_encryption import (
+from pyarrow._dataset_parquet_encryption import (  # type: ignore[unresolved_import]
     ParquetDecryptionConfig,
     ParquetEncryptionConfig,
 )
 from pyarrow.compute import Expression, field, scalar
-from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table  # type: ignore[unresolved_import]
 
 from ._fs import SupportedFileSystem
 
diff --git a/python/stubs/feather.pyi b/python/stubs/feather.pyi
index 9451ee15763..63766cd5d61 100644
--- a/python/stubs/feather.pyi
+++ b/python/stubs/feather.pyi
@@ -3,8 +3,8 @@ from typing import IO, Literal
 import pandas as pd
 
 from _typeshed import StrPath
-from pyarrow._feather import FeatherError
-from pyarrow.lib import Table
+from pyarrow._feather import FeatherError  # type: ignore[unresolved_import]
+from pyarrow.lib import Table  # type: ignore[unresolved_import]
 
 __all__ = [
     "FeatherError",
diff --git a/python/stubs/flight.pyi b/python/stubs/flight.pyi
index 9b806ccf305..aa06f3ebec7 100644
--- a/python/stubs/flight.pyi
+++ b/python/stubs/flight.pyi
@@ -1,4 +1,4 @@
-from pyarrow._flight import (
+from pyarrow._flight import (  # type: ignore[unresolved_import]
     Action,
     ActionType,
     BasicAuth,
diff --git a/python/stubs/fs.pyi b/python/stubs/fs.pyi
index 6bf75616c13..07a1d7765e6 100644
--- a/python/stubs/fs.pyi
+++ b/python/stubs/fs.pyi
@@ -1,4 +1,4 @@
-from pyarrow._fs import (  # noqa
+from pyarrow._fs import (  # type: ignore[unresolved_import]  # noqa
     FileSelector,
     FileType,
     FileInfo,
@@ -10,10 +10,10 @@ from pyarrow._fs import (  # noqa
     PyFileSystem,
     SupportedFileSystem,
 )
-from pyarrow._azurefs import AzureFileSystem
-from pyarrow._hdfs import HadoopFileSystem
-from pyarrow._gcsfs import GcsFileSystem
-from pyarrow._s3fs import (  # noqa
+from pyarrow._azurefs import AzureFileSystem  # type: ignore[unresolved_import]
+from pyarrow._hdfs import HadoopFileSystem  # type: ignore[unresolved_import]
+from pyarrow._gcsfs import GcsFileSystem  # type: ignore[unresolved_import]
+from pyarrow._s3fs import (  # type: ignore[unresolved_import]  # noqa
     AwsDefaultS3RetryStrategy,
     AwsStandardS3RetryStrategy,
     S3FileSystem,
diff --git a/python/stubs/interchange/buffer.pyi b/python/stubs/interchange/buffer.pyi
index 46673961a75..afef5acf353 100644
--- a/python/stubs/interchange/buffer.pyi
+++ b/python/stubs/interchange/buffer.pyi
@@ -1,6 +1,6 @@
 import enum
 
-from pyarrow.lib import Buffer
+from pyarrow.lib import Buffer  # type: ignore[unresolved_import]
 
 class DlpackDeviceType(enum.IntEnum):
     """Integer enum for device type codes matching DLPack."""
diff --git a/python/stubs/interchange/column.pyi b/python/stubs/interchange/column.pyi
index e6662867b6b..7d89c4ae6b0 100644
--- a/python/stubs/interchange/column.pyi
+++ b/python/stubs/interchange/column.pyi
@@ -1,8 +1,8 @@
 import enum
 
-from typing import Any, Iterable, TypeAlias, TypedDict
+from typing import Any, Iterable, TypeAlias, TypedDict  # type: ignore[unresolved_import]
 
-from pyarrow.lib import Array, ChunkedArray
+from pyarrow.lib import Array, ChunkedArray  # type: ignore[unresolved_import]
 
 from .buffer import _PyArrowBuffer
 
diff --git a/python/stubs/interchange/dataframe.pyi b/python/stubs/interchange/dataframe.pyi
index 526a58926a9..7a17dfeb1eb 100644
--- a/python/stubs/interchange/dataframe.pyi
+++ b/python/stubs/interchange/dataframe.pyi
@@ -7,7 +7,7 @@ else:
 from typing import Any, Iterable, Sequence
 
 from pyarrow.interchange.column import _PyArrowColumn
-from pyarrow.lib import RecordBatch, Table
+from pyarrow.lib import RecordBatch, Table  # type: ignore[unresolved_import]
 
 class _PyArrowDataFrame:
     """
diff --git a/python/stubs/interchange/from_dataframe.pyi b/python/stubs/interchange/from_dataframe.pyi
index b04b6268975..ad461270f56 100644
--- a/python/stubs/interchange/from_dataframe.pyi
+++ b/python/stubs/interchange/from_dataframe.pyi
@@ -1,6 +1,6 @@
-from typing import Any, Protocol, TypeAlias
+from typing import Any, Protocol, TypeAlias  # type: ignore[unresolved_import]
 
-from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table  # type: ignore[unresolved_import]
 
 from .column import (
     ColumnBuffers,
diff --git a/python/stubs/ipc.pyi b/python/stubs/ipc.pyi
index c7f2af004d4..2a5e8294e46 100644
--- a/python/stubs/ipc.pyi
+++ b/python/stubs/ipc.pyi
@@ -1,9 +1,9 @@
 from io import IOBase
 
 import pandas as pd
-import pyarrow.lib as lib
+import pyarrow.lib as lib  # type: ignore[unresolved_import]
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     IpcReadOptions,
     IpcWriteOptions,
     Message,
diff --git a/python/stubs/json.pyi b/python/stubs/json.pyi
index db1d35e0b8b..97b94d5dd77 100644
--- a/python/stubs/json.pyi
+++ b/python/stubs/json.pyi
@@ -1,3 +1,3 @@
-from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json  # type: ignore[unresolved_import]
 
 __all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/stubs/parquet/core.pyi b/python/stubs/parquet/core.pyi
index 56b2c8447d9..01dce442feb 100644
--- a/python/stubs/parquet/core.pyi
+++ b/python/stubs/parquet/core.pyi
@@ -13,10 +13,10 @@ if sys.version_info >= (3, 10):
 else:
     from typing_extensions import TypeAlias
 
-from pyarrow import _parquet
-from pyarrow._compute import Expression
-from pyarrow._fs import FileSystem, SupportedFileSystem
-from pyarrow._parquet import (
+from pyarrow import _parquet  # type: ignore[unresolved_import]
+from pyarrow._compute import Expression  # type: ignore[unresolved_import]
+from pyarrow._fs import FileSystem, SupportedFileSystem  # type: ignore[unresolved_import]
+from pyarrow._parquet import (  # type: ignore[unresolved_import]
     ColumnChunkMetaData,
     ColumnSchema,
     FileDecryptionProperties,
@@ -29,9 +29,9 @@ from pyarrow._parquet import (
     SortingColumn,
     Statistics,
 )
-from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow._stubs_typing import FilterTuple, SingleOrList  # type: ignore[unresolved_import]
 from pyarrow.dataset import ParquetFileFragment, Partitioning
-from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table  # type: ignore[unresolved_import]
 from typing_extensions import deprecated
 
 __all__ = (
diff --git a/python/stubs/parquet/encryption.pyi b/python/stubs/parquet/encryption.pyi
index 5a77dae7ef7..daade78e6dd 100644
--- a/python/stubs/parquet/encryption.pyi
+++ b/python/stubs/parquet/encryption.pyi
@@ -1,4 +1,4 @@
-from pyarrow._parquet_encryption import (
+from pyarrow._parquet_encryption import (  # type: ignore[unresolved_import]
     CryptoFactory,
     DecryptionConfiguration,
     EncryptionConfiguration,
diff --git a/python/stubs/substrait.pyi b/python/stubs/substrait.pyi
index a56a8a5b40f..004439d4c19 100644
--- a/python/stubs/substrait.pyi
+++ b/python/stubs/substrait.pyi
@@ -1,4 +1,4 @@
-from pyarrow._substrait import (
+from pyarrow._substrait import (  # type: ignore[unresolved_import]
     BoundExpressions,
     SubstraitSchema,
     deserialize_expressions,
diff --git a/python/stubs/types.pyi b/python/stubs/types.pyi
index 0cb4f6171d3..c128770d178 100644
--- a/python/stubs/types.pyi
+++ b/python/stubs/types.pyi
@@ -11,7 +11,7 @@ if sys.version_info >= (3, 10):
 else:
     from typing_extensions import TypeAlias
 
-from pyarrow.lib import (
+from pyarrow.lib import (  # type: ignore[unresolved_import]
     BinaryType,
     BinaryViewType,
     BoolType,

From a0d5b743c3c56c435f6b06699fc54249070961df Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 22:21:43 +0200
Subject: [PATCH 19/32] fix unsupported-operator

---
 python/pyarrow/pandas_compat.py               |  3 +-
 .../interchange/test_interchange_spec.py      |  6 +-
 python/pyarrow/tests/parquet/test_basic.py    |  1 -
 .../tests/parquet/test_parquet_file.py        |  2 +-
 python/pyarrow/tests/test_array.py            |  3 +-
 python/pyarrow/tests/test_cython.py           |  4 +-
 python/pyarrow/tests/test_extension_type.py   |  4 +-
 python/pyarrow/tests/test_flight.py           | 14 +++-
 python/pyarrow/tests/test_jvm.py              |  5 +-
 python/pyarrow/tests/test_pandas.py           |  6 +-
 python/pyproject.toml                         |  4 +-
 python/setup.py                               |  8 +-
 python/stubs/_stubs_typing.pyi                |  4 +-
 python/stubs/compute.pyi                      | 74 +++++++++----------
 14 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index f284d411abf..8dcfb282b31 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -828,7 +828,8 @@ def table_to_dataframe(
         axes = [columns, index]
         mgr = BlockManager(blocks, axes)
         if _pandas_api.is_ge_v21():
-            df = DataFrame._from_mgr(mgr, mgr.axes)  # type: ignore[unresolved-attribute]
+            # type: ignore[unresolved-attribute]
+            df = DataFrame._from_mgr(mgr, mgr.axes)
         else:
             df = DataFrame(mgr)
         return df
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 56a424fd57a..68afc0c633b 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -18,15 +18,15 @@
 import ctypes
 import hypothesis as h
 import hypothesis.strategies as st
-
+import pyarrow as pa
+import pyarrow.tests.strategies as past
 import pytest
+
 np = None
 try:
     import numpy as np
 except ImportError:
     pass
-import pyarrow as pa
-import pyarrow.tests.strategies as past
 
 
 all_types = st.deferred(
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 528f8e51683..18381538211 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -16,7 +16,6 @@
 # under the License.
 
 import os
-from collections import OrderedDict
 import io
 import warnings
 from shutil import copytree
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 28f25ac8482..df5b82ad8d9 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -408,7 +408,7 @@ def test_parquet_file_hugginface_support():
         pytest.skip("fsspec is not installed, skipping Hugging Face test")
 
     fake_hf_module = types.ModuleType("huggingface_hub")
-    fake_hf_module.HfFileSystem = MemoryFileSystem #  type: ignore[unresolved-attribute]
+    fake_hf_module.HfFileSystem = MemoryFileSystem  # type: ignore[unresolved-attribute]
     with mock.patch.dict("sys.modules", {"huggingface_hub": fake_hf_module}):
         uri = "hf://datasets/apache/arrow/test.parquet"
         table = pa.table({"a": range(10)})
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index a06e3f76570..a1377d0c839 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -551,7 +551,8 @@ def test_arange():
         result = pa.arange(*case)
         result.validate(full=True)
 
-        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))  # type: ignore[no-matching-overload]
+        # type: ignore[no-matching-overload]
+        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))
 
     # Validate memory_pool keyword argument
     result = pa.arange(-1, 101, memory_pool=pa.default_memory_pool())
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index fdacb16be29..c9c35087839 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -191,7 +191,7 @@ def test_visit_strings(tmpdir):
 
     strings = ['a', 'b', 'c']
     visited = []
-    mod._visit_strings(strings, visited.append) #  type: ignore[unresolved-attribute]
+    mod._visit_strings(strings, visited.append)  # type: ignore[unresolved-attribute]
 
     assert visited == strings
 
@@ -200,4 +200,4 @@ def raise_on_b(s):
             if s == 'b':
                 raise ValueError('wtf')
 
-        mod._visit_strings(strings, raise_on_b) #  type: ignore[unresolved-attribute]
+        mod._visit_strings(strings, raise_on_b)  # type: ignore[unresolved-attribute]
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 1a851611b14..a3847c44e4f 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1353,11 +1353,11 @@ def test_cpp_extension_in_python(tmpdir):
     sys.path.insert(0, str(tmpdir))
     mod = __import__('extensions')
 
-    uuid_type = mod._make_uuid_type() #  type: ignore[unresolved-attribute]
+    uuid_type = mod._make_uuid_type()  # type: ignore[unresolved-attribute]
     assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
-    array = mod._make_uuid_array() #  type: ignore[unresolved-attribute]
+    array = mod._make_uuid_array()  # type: ignore[unresolved-attribute]
     assert array.type == uuid_type
     assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba']
     assert array[0].as_py() == b'abcdefghijklmno0'
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 0c0bc7089b9..b33ae005331 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -53,29 +53,39 @@
     class context_like(object):
         def __enter__(self):
             return self
+
         def __exit__(self, exc_type, exc_value, traceback):
             pass
 
     flight = None
+
     class MockContextManager:
         def __init__(self, *args, **kwargs):
-          pass
+            pass
+
         def __enter__(self):
             return self
+
         def __exit__(self, exc_type, exc_val, exc_tb):
             pass
+
     class FlightServerBase(MockContextManager):
         def serve(self):
             pass
+
     class FlightClient(MockContextManager):
         def get_flight_info(self, **kwargs):
             pass
+
         def do_action(self, **kwargs):
             pass
+
         def do_get(self, **kwargs):
             pass
+
         def do_put(self, **kwargs):
             pass
+
         def close(self):
             pass
     ServerAuthHandler, ClientAuthHandler = object, object
@@ -1770,7 +1780,7 @@ def test_flight_do_put_limit():
             with pytest.raises(flight.FlightWriteSizeExceededError,
                                match="exceeded soft limit") as excinfo:
                 writer.write_batch(large_batch)
-            assert excinfo.value.limit == 4096 #  type: ignore[unresolved-attribute]
+            assert excinfo.value.limit == 4096  # type: ignore[unresolved-attribute]
             smaller_batches = [
                 large_batch.slice(0, 384),
                 large_batch.slice(384),
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index b048fcea9ee..876c05d740a 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -16,6 +16,7 @@
 # under the License.
 
 from json import dumps as json_dumps
+from json import loads as json_loads
 import os
 import pyarrow as pa
 import pyarrow.jvm as pa_jvm
@@ -171,7 +172,7 @@ def test_jvm_types(root_allocator, pa_type, jvm_spec, nullable):
     spec = {
         'name': 'field_name',
         'nullable': nullable,
-        'type': json.loads(jvm_spec),
+        'type': json_loads(jvm_spec),
         # TODO: This needs to be set for complex types
         'children': []
     }
@@ -375,7 +376,7 @@ def test_jvm_record_batch(root_allocator, pa_type, py_data, jvm_type,
     spec = {
         'name': 'field_name',
         'nullable': False,
-        'type': json.loads(jvm_spec),
+        'type': json_loads(jvm_spec),
         # TODO: This needs to be set for complex types
         'children': []
     }
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 535b95515dc..1bd5b58025d 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4428,7 +4428,8 @@ def test_convert_to_extension_array(monkeypatch):
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__") #  type: ignore[unresolved-attribute]
+            # type: ignore[unresolved-attribute]
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
     assert len(_get_mgr(result).blocks) == 1
@@ -4474,7 +4475,8 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__") #  type: ignore[unresolved-attribute]
+            # type: ignore[unresolved-attribute]
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
 
     result = arr.to_pandas()
     assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 5c0580a0510..a35a73911c2 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -115,6 +115,6 @@ unresolved-attribute = "ignore"
 unresolved-global = "ignore"
 unresolved-import = "ignore"
 unresolved-reference = "ignore"
-unsupported-operator = "ignore"
+#unsupported-operator = "ignore"
 missing-argument = "ignore"
-call-non-callable = "ignore"
+#call-non-callable = "ignore"
diff --git a/python/setup.py b/python/setup.py
index 4e87ecfbfcc..d037b82f4ad 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -44,11 +44,12 @@
 # as here it may be set to the host not target platform
 is_emscripten = (
     sysconfig.get_config_var("SOABI")
-    and sysconfig.get_config_var("SOABI").find("emscripten") != -1  # type: ignore[possibly-unbound]
+    # type: ignore[possibly-unbound]
+    and sysconfig.get_config_var("SOABI").find("emscripten") != -1
 )
 
 
-if Cython.__version__ < '3': #  type: ignore[unresolved-attribute]
+if Cython.__version__ < '3':  # type: ignore[unresolved-attribute]
     raise Exception(
         'Please update your Cython version. Supported Cython >= 3')
 
@@ -254,7 +255,8 @@ def _run_cmake(self):
             if os.path.isfile('CMakeCache.txt'):
                 cachefile = open('CMakeCache.txt', 'r')
                 cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
-                                     cachefile.read()).group(1)  # type: ignore[possibly-unbound-attribute]
+                                     # type: ignore[possibly-unbound-attribute]
+                                     cachefile.read()).group(1)
                 cachefile.close()
                 if (cachedir != build_temp):
                     build_base = pjoin(saved_cwd, build_cmd.build_base)
diff --git a/python/stubs/_stubs_typing.pyi b/python/stubs/_stubs_typing.pyi
index 73bb9f38a95..3529290ff17 100644
--- a/python/stubs/_stubs_typing.pyi
+++ b/python/stubs/_stubs_typing.pyi
@@ -31,8 +31,8 @@ Compression: TypeAlias = Literal[
 ]
 NullEncoding: TypeAlias = Literal["mask", "encode"]
 NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
-Mask: TypeAlias = Sequence[bool | None] | NDArray[np.bool_] | BooleanArray
-Indices: TypeAlias = Sequence[int] | NDArray[np.integer[Any]] | IntegerArray
+Mask: TypeAlias = Union[Sequence[bool | None], NDArray[np.bool_], BooleanArray]
+Indices: TypeAlias = Union[Sequence[int], NDArray[np.integer[Any]], IntegerArray]
 PyScalar: TypeAlias = Union[
     bool, int, float, Decimal, str, bytes, dt.date, dt.datetime, dt.time, dt.timedelta
 ]
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
index 775b7fa504e..4788837eeb9 100644
--- a/python/stubs/compute.pyi
+++ b/python/stubs/compute.pyi
@@ -171,54 +171,54 @@ _Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
 _ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
 _ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
 _ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
-ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ArrayOrChunkedArray: TypeAlias = Union[lib.Array[_Scalar_CoT], lib.ChunkedArray[_Scalar_CoT]]
 ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
 
-SignedIntegerScalar: TypeAlias = (
-    lib.Scalar[lib.Int8Type]
-    | lib.Scalar[lib.Int16Type]
-    | lib.Scalar[lib.Int32Type]
-    | lib.Scalar[lib.Int64Type]
-)
-UnsignedIntegerScalar: TypeAlias = (
-    lib.Scalar[lib.UInt8Type]
-    | lib.Scalar[lib.UInt16Type]
-    | lib.Scalar[lib.Uint32Type]
-    | lib.Scalar[lib.UInt64Type]
-)
+SignedIntegerScalar: TypeAlias = Union[
+    lib.Scalar[lib.Int8Type],
+    lib.Scalar[lib.Int16Type],
+    lib.Scalar[lib.Int32Type],
+    lib.Scalar[lib.Int64Type],
+]
+UnsignedIntegerScalar: TypeAlias = Union[
+    lib.Scalar[lib.UInt8Type],
+    lib.Scalar[lib.UInt16Type],
+    lib.Scalar[lib.Uint32Type],
+    lib.Scalar[lib.UInt64Type],
+]
 IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
-FloatScalar: TypeAlias = (
-    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
-)
-DecimalScalar: TypeAlias = (
-    lib.Scalar[lib.Decimal32Type]
-    | lib.Scalar[lib.Decimal64Type]
-    | lib.Scalar[lib.Decimal128Type]
-    | lib.Scalar[lib.Decimal256Type]
-)
+FloatScalar: TypeAlias = Union[
+    lib.Scalar[lib.Float16Type], lib.Scalar[lib.Float32Type], lib.Scalar[lib.Float64Type],
+]
+DecimalScalar: TypeAlias = Union[
+    lib.Scalar[lib.Decimal32Type],
+    lib.Scalar[lib.Decimal64Type],
+    lib.Scalar[lib.Decimal128Type],
+    lib.Scalar[lib.Decimal256Type],
+]
 NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
 NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
-BinaryScalar: TypeAlias = (
-    lib.Scalar[lib.BinaryType]
-    | lib.Scalar[lib.LargeBinaryType]
-    | lib.Scalar[lib.FixedSizeBinaryType]
-)
-StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+BinaryScalar: TypeAlias = Union[
+    lib.Scalar[lib.BinaryType],
+    lib.Scalar[lib.LargeBinaryType],
+    lib.Scalar[lib.FixedSizeBinaryType],
+]
+StringScalar: TypeAlias = Union[lib.Scalar[lib.StringType], lib.Scalar[lib.LargeStringType]]
 StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
 _ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
 _LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
 ListScalar: TypeAlias = (
     lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
 )
-TemporalScalar: TypeAlias = (
-    lib.Date32Scalar
-    | lib.Date64Scalar
-    | lib.Time32Scalar[Any]
-    | lib.Time64Scalar[Any]
-    | lib.TimestampScalar[Any]
-    | lib.DurationScalar[Any]
-    | lib.MonthDayNanoIntervalScalar
-)
+TemporalScalar: TypeAlias = Union[
+    lib.Date32Scalar,
+    lib.Date64Scalar,
+    lib.Time32Scalar[Any],
+    lib.Time64Scalar[Any],
+    lib.TimestampScalar[Any],
+    lib.DurationScalar[Any],
+    lib.MonthDayNanoIntervalScalar
+]
 NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
 NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
 

From eff8ada65b969798e4c359a1159c4221714e0354 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 22:26:39 +0200
Subject: [PATCH 20/32] Fix unresolved-reference

---
 python/pyproject.toml            | 2 +-
 python/stubs/__lib_pxi/types.pyi | 1 -
 python/stubs/_fs.pyi             | 2 --
 python/stubs/compute.pyi         | 6 +++---
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index a35a73911c2..85f44572ae8 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -114,7 +114,7 @@ unknown-argument = "ignore"
 unresolved-attribute = "ignore"
 unresolved-global = "ignore"
 unresolved-import = "ignore"
-unresolved-reference = "ignore"
+#unresolved-reference = "ignore"
 #unsupported-operator = "ignore"
 missing-argument = "ignore"
 #call-non-callable = "ignore"
diff --git a/python/stubs/__lib_pxi/types.pyi b/python/stubs/__lib_pxi/types.pyi
index d38269ef341..f1b8d540e31 100644
--- a/python/stubs/__lib_pxi/types.pyi
+++ b/python/stubs/__lib_pxi/types.pyi
@@ -29,7 +29,6 @@ from .io import Buffer
 from .scalar import ExtensionScalar
 
 _AsPyType = TypeVar("_AsPyType")
-_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
 class _Weakrefable: ...
 class _Metadata(_Weakrefable): ...
diff --git a/python/stubs/_fs.pyi b/python/stubs/_fs.pyi
index d3b194e3ded..9f6e28dcf0f 100644
--- a/python/stubs/_fs.pyi
+++ b/python/stubs/_fs.pyi
@@ -19,8 +19,6 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
 
 from .lib import NativeFile, _Weakrefable
 
-SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
-
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
diff --git a/python/stubs/compute.pyi b/python/stubs/compute.pyi
index 4788837eeb9..373155d6e4e 100644
--- a/python/stubs/compute.pyi
+++ b/python/stubs/compute.pyi
@@ -1,5 +1,5 @@
 # ruff: noqa: I001
-from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence  # type: ignore[unresolved_import]
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence, Union  # type: ignore[unresolved_import]
 from collections.abc import Callable
 
 # Option classes
@@ -205,8 +205,8 @@ BinaryScalar: TypeAlias = Union[
 ]
 StringScalar: TypeAlias = Union[lib.Scalar[lib.StringType], lib.Scalar[lib.LargeStringType]]
 StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
-_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
-_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+_ListScalar: TypeAlias = Union[lib.ListViewScalar[_DataTypeT], lib.FixedSizeListScalar[_DataTypeT, Any]]
+_LargeListScalar: TypeAlias = Union[lib.LargeListScalar[_DataTypeT], lib.LargeListViewScalar[_DataTypeT]]
 ListScalar: TypeAlias = (
     lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
 )

From 1b3b39bca766eb433c4287e6b2aa5b509d57213c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 22:34:38 +0200
Subject: [PATCH 21/32] Fix not-iterable

---
 python/pyarrow/interchange/from_dataframe.py | 4 ++--
 python/pyproject.toml                        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/interchange/from_dataframe.py b/python/pyarrow/interchange/from_dataframe.py
index fcaec41e3dc..47ddbb885ff 100644
--- a/python/pyarrow/interchange/from_dataframe.py
+++ b/python/pyarrow/interchange/from_dataframe.py
@@ -450,7 +450,7 @@ def buffers_to_array(
 def validity_buffer_from_mask(
     validity_buff: BufferObject,
     validity_dtype: Dtype,
-    describe_null: ColumnNullType,
+    describe_null: Tuple[ColumnNullType, Any],
     length: int,
     offset: int = 0,
     allow_copy: bool = True,
@@ -529,7 +529,7 @@ def validity_buffer_from_mask(
 def validity_buffer_nan_sentinel(
     data_pa_buffer: BufferObject,
     data_type: Dtype,
-    describe_null: ColumnNullType,
+    describe_null: Tuple[ColumnNullType, Any],
     length: int,
     offset: int = 0,
     allow_copy: bool = True,
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 85f44572ae8..17076cdd6bc 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -105,14 +105,14 @@ invalid-context-manager = "ignore"
 invalid-return-type = "ignore"
 invalid-type-form = "ignore"
 no-matching-overload = "ignore"
-non-subscriptable = "ignore"
-not-iterable = "ignore"
+#non-subscriptable = "ignore"
+#not-iterable = "ignore"
 possibly-unbound-attribute = "ignore"
 possibly-unbound-import = "ignore"
 too-many-positional-arguments = "ignore"
-unknown-argument = "ignore"
+#unknown-argument = "ignore"
 unresolved-attribute = "ignore"
-unresolved-global = "ignore"
+#unresolved-global = "ignore"
 unresolved-import = "ignore"
 #unresolved-reference = "ignore"
 #unsupported-operator = "ignore"

From e2b0a7edefe763a025b85830c0858ef424b2bbdb Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 24 Jul 2025 23:31:28 +0200
Subject: [PATCH 22/32] Fix no-matching-overload

---
 python/benchmarks/parquet.py                           |  2 +-
 python/pyarrow/__init__.py                             |  2 +-
 python/pyarrow/_compute.pyx                            |  2 +-
 python/pyarrow/_dataset.pyx                            |  4 ++--
 python/pyarrow/_dataset_parquet.pyx                    |  2 +-
 python/pyarrow/_substrait.pyx                          |  2 +-
 python/pyarrow/acero.py                                |  2 +-
 python/pyarrow/lib.pyx                                 |  2 +-
 python/pyarrow/pandas_compat.py                        |  2 +-
 python/pyarrow/tests/parquet/common.py                 |  2 +-
 python/pyarrow/tests/parquet/test_basic.py             |  6 +++---
 .../tests/parquet/test_compliant_nested_type.py        |  4 ++--
 python/pyarrow/tests/parquet/test_data_types.py        |  6 +++---
 python/pyarrow/tests/parquet/test_dataset.py           |  6 +++---
 python/pyarrow/tests/parquet/test_datetime.py          |  6 +++---
 python/pyarrow/tests/parquet/test_encryption.py        |  3 +--
 python/pyarrow/tests/parquet/test_metadata.py          |  6 +++---
 python/pyarrow/tests/parquet/test_pandas.py            |  4 ++--
 python/pyarrow/tests/parquet/test_parquet_file.py      |  2 +-
 python/pyarrow/tests/parquet/test_parquet_writer.py    |  2 +-
 python/pyarrow/tests/strategies.py                     | 10 +++++-----
 python/pyarrow/tests/test_acero.py                     |  2 +-
 python/pyarrow/tests/test_array.py                     |  6 +++---
 python/pyarrow/tests/test_cffi.py                      |  4 ++--
 python/pyarrow/tests/test_compute.py                   |  6 +++---
 python/pyarrow/tests/test_convert_builtin.py           |  2 +-
 python/pyarrow/tests/test_dataset.py                   |  8 ++++----
 python/pyarrow/tests/test_dataset_encryption.py        |  3 +--
 python/pyarrow/tests/test_extension_type.py            |  4 ++--
 python/pyarrow/tests/test_feather.py                   |  2 +-
 python/pyarrow/tests/test_flight.py                    |  2 +-
 python/pyarrow/tests/test_io.py                        |  2 +-
 python/pyarrow/tests/test_ipc.py                       |  2 +-
 python/pyarrow/tests/test_json.py                      |  2 +-
 python/pyarrow/tests/test_pandas.py                    |  2 +-
 python/pyarrow/tests/test_scalars.py                   |  2 +-
 python/pyarrow/tests/test_schema.py                    |  2 +-
 python/pyarrow/tests/test_sparse_tensor.py             |  4 ++--
 python/pyarrow/tests/test_substrait.py                 |  2 +-
 python/pyarrow/tests/test_table.py                     |  2 +-
 python/pyarrow/tests/test_types.py                     |  2 +-
 python/pyarrow/tests/test_udf.py                       |  4 ++--
 python/pyarrow/tests/util.py                           |  3 ++-
 python/pyproject.toml                                  |  8 ++++----
 python/stubs/_csv.pyi                                  |  8 ++++----
 45 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
index 44b27ff0f46..7dbd6adcc38 100644
--- a/python/benchmarks/parquet.py
+++ b/python/benchmarks/parquet.py
@@ -21,7 +21,7 @@
 try:
     import pyarrow.parquet as pq
 except ImportError:
-    pq = None
+    pass
 from pyarrow.tests.util import rands
 
 
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 45aa2b619f8..1170db23424 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -56,7 +56,7 @@ def parse_git(root, **kwargs):
         __version__ = setuptools_scm.get_version('../',
                                                  parse=parse_git)
     except ImportError:
-        __version__ = None
+        __version__ = None  # type: ignore[invalid-assignment]
 
 import pyarrow.lib as _lib  # type: ignore[unresolved_import]
 from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,  # type: ignore[unresolved_import]
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 59fd775b5ac..ad0b116fdc6 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -36,7 +36,7 @@ import inspect
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import warnings
 
 
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index d279881d15f..abfd011fa21 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -42,7 +42,7 @@ from pyarrow._json cimport ReadOptions as JsonReadOptions
 try:
     import pyarrow.substrait as pa_substrait
 except ImportError:
-    pa_substrait = None
+    pass
 
 
 _DEFAULT_BATCH_SIZE = 2**17
@@ -89,7 +89,7 @@ def _get_parquet_classes():
         try:
             import pyarrow._dataset_parquet as _dataset_pq
         except ImportError:
-            _dataset_pq = None
+            pass
 
 
 def _get_parquet_symbol(name):
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index 9405b5d8c54..e17867426dc 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -59,7 +59,7 @@ try:
     )
     parquet_encryption_enabled = True
 except ImportError:
-    parquet_encryption_enabled = False
+    pass
 
 
 cdef Expression _true = Expression._scalar(True)
diff --git a/python/pyarrow/_substrait.pyx b/python/pyarrow/_substrait.pyx
index d9359c8e77d..b317ba1e639 100644
--- a/python/pyarrow/_substrait.pyx
+++ b/python/pyarrow/_substrait.pyx
@@ -29,7 +29,7 @@ from pyarrow.includes.libarrow_substrait cimport *
 try:
     import substrait as py_substrait
 except ImportError:
-    py_substrait = None
+    pass
 else:
     import substrait.proto  # no-cython-lint
 
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index dcead124d31..e56fe0000ea 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -53,7 +53,7 @@ class Dataset:
 
         class InMemoryDataset:
             pass
-    ds = DatasetModuleStub
+    ds = DatasetModuleStub  # type: ignore[invalid-assignment]
 
 
 def _dataset_to_decl(dataset, use_threads=True, implicit_ordering=False):
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 5dca6fd8d2e..2da25a570ae 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -24,7 +24,7 @@ import decimal as _pydecimal
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import os
 import sys
 
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 8dcfb282b31..db81b40d334 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # type: ignore[unresolved_import]  # noqa
 
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 7351a4c3e94..28e04abf1c5 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 18381538211..7a0dfcde270 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -33,7 +33,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -43,12 +43,12 @@
     from pyarrow.tests.pandas_examples import dataframe_with_lists
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 8a64cd0cab7..3e6543a0538 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -24,7 +24,7 @@
     from pyarrow.tests.parquet.common import (_read_table,
                                               _check_roundtrip)
 except ImportError:
-    pq = None
+    pass
 
 try:
     import pandas as pd
@@ -32,7 +32,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 66e12d11b21..b48627d679d 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 
 import pyarrow as pa
@@ -33,7 +33,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -44,7 +44,7 @@
                                                dataframe_with_lists)
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index a162006dc0c..1e6897f703d 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 import unittest.mock as mock
 
@@ -40,7 +40,7 @@
     from pyarrow.tests.parquet.common import (
         _read_table, _test_dataframe, _write_table)
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -48,7 +48,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 62904937eb5..7a95debca3f 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 
 import pyarrow as pa
@@ -32,7 +32,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -41,7 +41,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py
index a11a4935a1c..5815d65c8d8 100644
--- a/python/pyarrow/tests/parquet/test_encryption.py
+++ b/python/pyarrow/tests/parquet/test_encryption.py
@@ -22,8 +22,7 @@
     import pyarrow.parquet as pq
     import pyarrow.parquet.encryption as pe
 except ImportError:
-    pq = None
-    pe = None
+    pass
 else:
     from pyarrow.tests.parquet.encryption import (
         InMemoryKmsClient, verify_file_encrypted)
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index d8fafde185f..85823b5cfa2 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 
 import pyarrow as pa
@@ -35,7 +35,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -44,7 +44,7 @@
 
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 9b9e7c4e48e..34d7c1c750a 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pss
 import pytest
 
 import pyarrow as pa
@@ -44,7 +44,7 @@
     from pyarrow.tests.parquet.common import (_roundtrip_pandas_dataframe,
                                               alltypes_sample)
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index df5b82ad8d9..6864fe08dca 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -38,7 +38,7 @@
 
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index 8f163dfc0b5..d82b6c6da8b 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -33,7 +33,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 243815c59f7..07ebaa771f1 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -24,24 +24,24 @@
 try:
     import hypothesis.extra.numpy as npst
 except ImportError:
-    npst = None  # type: ignore[assignment]
+    pass
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    tzst = None  # type: ignore[assignment]
+    pass
 try:
     import zoneinfo
 except ImportError:
-    zoneinfo = None  # type: ignore[assignment]
+    pass
 if sys.platform == 'win32':
     try:
         import tzdata  # noqa:F401
     except ImportError:
-        zoneinfo = None
+        pass
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index cb97e3849fd..8dba7471b49 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -39,7 +39,7 @@
     import pyarrow.dataset as ds
     from pyarrow.acero import ScanNodeOptions
 except ImportError:
-    ds = None
+    pass
 
 pytestmark = pytest.mark.acero
 
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index a1377d0c839..92db9fc177a 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -30,7 +30,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -551,8 +551,8 @@ def test_arange():
         result = pa.arange(*case)
         result.validate(full=True)
 
-        # type: ignore[no-matching-overload]
-        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))
+        assert result.equals(pa.array(list(range(*case)), type=pa.int64())) \
+            # type: ignore[no-matching-overload]
 
     # Validate memory_pool keyword argument
     result = pa.arange(-1, 101, memory_pool=pa.default_memory_pool())
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 2d0ff8b45f1..60f3a5621b9 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -24,7 +24,7 @@
 try:
     from pyarrow.cffi import ffi
 except ImportError:
-    ffi = None  # type: ignore[assignment]
+    pass
 
 import pytest
 
@@ -32,7 +32,7 @@
     import pandas as pd
     import pandas.testing as tm
 except ImportError:
-    pd = tm = None  # type: ignore[assignment]
+    pass
 
 
 needs_cffi = pytest.mark.skipif(ffi is None,
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 4ab0e632134..3737fc89025 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -31,12 +31,12 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 try:
     import pandas as pd
 except ImportError:
-    pd = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -45,7 +45,7 @@
 try:
     import pyarrow.substrait as pas
 except ImportError:
-    pas = None
+    pass
 
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 468bddf58cb..a420af18864 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index d9a4d3df207..b22f423cad8 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -32,7 +32,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 
 import pyarrow as pa
@@ -49,17 +49,17 @@
 try:
     import pandas as pd
 except ImportError:
-    pd = None  # type: ignore[assignment]
+    pass
 
 try:
     import pyarrow.dataset as ds
 except ImportError:
-    ds = None
+    pass
 
 try:
     import pyarrow.parquet as pq
 except ImportError:
-    pq = None
+    pass
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not dataset'
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index eb79121b1cd..11d2e1f367a 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -29,8 +29,7 @@
     import pyarrow.parquet as pq
     import pyarrow.dataset as ds
 except ImportError:
-    pq = None
-    ds = None
+    pass
 
 try:
     from pyarrow.tests.parquet.encryption import InMemoryKmsClient
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index a3847c44e4f..c1e5db238ad 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
@@ -1882,7 +1882,7 @@ def test_bool8_from_numpy_conversion():
         ValueError,
         match="Cannot convert 0-D array to bool8 array",
     ):
-        pa.Bool8Array.from_numpy(np.bool_())
+        pa.Bool8Array.from_numpy(np.bool_())  # type: ignore[no-matching-overload]
 
     # must use compatible storage type
     with pytest.raises(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 8235260f468..6b35822017b 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -26,7 +26,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index b33ae005331..4f4c5f20e5d 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index b1ec7674f87..43fd0e1ac0e 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 from pyarrow.util import guid
 from pyarrow import Codec
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 26df224ee49..ed6e7563ed2 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -28,7 +28,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index ab0602cd198..90ce549c6e6 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 1bd5b58025d..45352ee3614 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -38,7 +38,7 @@
             VisibleDeprecationWarning as _np_VisibleDeprecationWarning
         )
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index f48761b1918..cdbe3d00aae 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 import pyarrow.compute as pc
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 6d1ff431819..dc98f03cded 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index e4d141e2a6f..6a398f38ac5 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -29,12 +29,12 @@
     import scipy
     from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
 except ImportError:
-    scipy = None  # type: ignore[assignment]
+    pass
 
 try:
     import sparse  # type: ignore[unresolved_import]
 except ImportError:
-    sparse = None
+    pass
 
 
 tensor_type_pairs = [
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index fae89d3cee5..d3f5d848bce 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -29,7 +29,7 @@
 try:
     import pyarrow.substrait as substrait
 except ImportError:
-    substrait = None
+    pass
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not substrait'
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ead5cbaddc5..eeb6c8f8539 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 338c022a223..5e5f4903e29 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -27,7 +27,7 @@
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    tzst = None  # type: ignore[assignment]
+    pass
 import weakref
 
 try:
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 891295a5519..aed2fbceaeb 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None  # type: ignore[assignment]
+    pass
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -35,7 +35,7 @@
 try:
     import pyarrow.dataset as ds
 except ImportError:
-    ds = None
+    pass
 
 
 def mock_udf_context(batch_length=10):
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index d8b250ffff0..7eefd067807 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -171,7 +171,8 @@ def get_modified_env_with_pythonpath():
     existing_pythonpath = env.get('PYTHONPATH', '')
 
     module_path = os.path.abspath(
-        os.path.dirname(os.path.dirname(pa.__file__)))
+        os.path.dirname(os.path.dirname(pa.__file__))) \
+        # type: ignore[no-matching-overload]
 
     if existing_pythonpath:
         new_pythonpath = os.pathsep.join((module_path, existing_pythonpath))
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 17076cdd6bc..983d2ed9174 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -101,10 +101,10 @@ fallback_version = '22.0.0a0'
 [tool.ty.rules]
 invalid-argument-type = "ignore"
 invalid-assignment = "ignore"
-invalid-context-manager = "ignore"
-invalid-return-type = "ignore"
-invalid-type-form = "ignore"
-no-matching-overload = "ignore"
+#invalid-context-manager = "ignore"
+#invalid-return-type = "ignore"
+#invalid-type-form = "ignore"
+#no-matching-overload = "ignore"
 #non-subscriptable = "ignore"
 #not-iterable = "ignore"
 possibly-unbound-attribute = "ignore"
diff --git a/python/stubs/_csv.pyi b/python/stubs/_csv.pyi
index 2f49f8c9a6c..1f724594d35 100644
--- a/python/stubs/_csv.pyi
+++ b/python/stubs/_csv.pyi
@@ -96,7 +96,7 @@ class ReadOptions(lib._Weakrefable):
     3: [[2022-03-03,2022-03-04]]
     """
 
-    use_threads: bool = field(default=True, kw_only=False)
+    use_threads: bool = field(default=True)
     block_size: int | None = None
     skip_rows: int = 0
     skip_rows_after_names: int = 0
@@ -182,7 +182,7 @@ class ParseOptions(lib._Weakrefable):
     entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
     """
 
-    delimiter: str = field(default=",", kw_only=False)
+    delimiter: str = field(default=",")
     quote_char: str | Literal[False] = '"'
     double_quote: bool = True
     escape_char: str | Literal[False] = False
@@ -397,7 +397,7 @@ class ConvertOptions(lib._Weakrefable):
     fast: [[true,true,false,false,null]]
     """
 
-    check_utf8: bool = field(default=True, kw_only=False)
+    check_utf8: bool = field(default=True)
     column_types: lib.Schema | dict | None = None
     null_values: list[str] | None = None
     true_values: list[str] | None = None
@@ -438,7 +438,7 @@ class WriteOptions(lib._Weakrefable):
           will raise an error.
     """
 
-    include_header: bool = field(default=True, kw_only=False)
+    include_header: bool = field(default=True)
     batch_size: int = 1024
     delimiter: str = ","
     quoting_style: Literal["needed", "all_valid", "none"] = "needed"

From 494a581199b2f02700477e4432c5c019f55a9e16 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 00:00:51 +0200
Subject: [PATCH 23/32] Fix invalid-assignment

---
 .../pyarrow/tests/parquet/test_compliant_nested_type.py  | 2 --
 python/pyarrow/tests/parquet/test_metadata.py            | 2 --
 python/pyarrow/tests/parquet/test_pandas.py              | 4 ++--
 python/pyarrow/tests/parquet/test_parquet_file.py        | 4 +---
 python/pyarrow/tests/parquet/test_parquet_writer.py      | 2 +-
 python/pyarrow/tests/test_compute.py                     | 4 ----
 python/pyarrow/tests/test_flight.py                      | 9 ---------
 python/pyproject.toml                                    | 2 +-
 8 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 3e6543a0538..d7388be8a1b 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -28,8 +28,6 @@
 
 try:
     import pandas as pd
-    import pandas.testing as tm
-
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
     pass
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 85823b5cfa2..d180fbfb4e5 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -40,8 +40,6 @@
 
 try:
     import pandas as pd
-    import pandas.testing as tm
-
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
     pass
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 34d7c1c750a..f9f4e74dc86 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    pss
+    pass
 import pytest
 
 import pyarrow as pa
@@ -34,7 +34,7 @@
     from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
                                               _write_table)
 except ImportError:
-    pq = None
+    pass
 
 
 try:
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 6864fe08dca..b09c26c7144 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -30,12 +30,10 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _write_table
 except ImportError:
-    pq = None
+    pass
 
 try:
-    import pandas as pd
     import pandas.testing as tm
-
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
     pass
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index d82b6c6da8b..d6f30ea16be 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -25,7 +25,7 @@
     from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
                                               _range_integers)
 except ImportError:
-    pq = None
+    pass
 
 
 try:
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 3737fc89025..4e39383473c 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -42,10 +42,6 @@
 import pyarrow.compute as pc
 from pyarrow.lib import ArrowNotImplementedError  # type: ignore[unresolved_import]
 
-try:
-    import pyarrow.substrait as pas
-except ImportError:
-    pass
 
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 4f4c5f20e5d..01a7cc058b2 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -50,15 +50,6 @@
         ClientMiddleware, ClientMiddlewareFactory,
     )
 except ImportError:
-    class context_like(object):
-        def __enter__(self):
-            return self
-
-        def __exit__(self, exc_type, exc_value, traceback):
-            pass
-
-    flight = None
-
     class MockContextManager:
         def __init__(self, *args, **kwargs):
             pass
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 983d2ed9174..952b73c93dc 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -100,7 +100,7 @@ fallback_version = '22.0.0a0'
 
 [tool.ty.rules]
 invalid-argument-type = "ignore"
-invalid-assignment = "ignore"
+#invalid-assignment = "ignore"
 #invalid-context-manager = "ignore"
 #invalid-return-type = "ignore"
 #invalid-type-form = "ignore"

From c5251d30c70d7e992641e8dcda5e0ebdbd42a76b Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 00:17:26 +0200
Subject: [PATCH 24/32] Fix too-many-positional-arguments

---
 python/pyarrow/tests/test_dataset.py | 3 ++-
 python/pyarrow/tests/test_flight.py  | 8 ++++----
 python/pyproject.toml                | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index b22f423cad8..8dfdbcb8c2a 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2216,7 +2216,8 @@ def test_dictionary_partitioning_outer_nulls_raises(tempdir):
 def test_positional_keywords_raises(tempdir):
     table = pa.table({'a': ['x', 'y', None], 'b': ['x', 'y', 'z']})
     with pytest.raises(TypeError):
-        ds.write_dataset(table, tempdir, "basename-{i}.arrow")
+        ds.write_dataset(table, tempdir, "basename-{i}.arrow") \
+            # type: ignore[too-many-positional-arguments]
 
 
 @pytest.mark.parquet
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 01a7cc058b2..f14e5215b33 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -65,16 +65,16 @@ def serve(self):
             pass
 
     class FlightClient(MockContextManager):
-        def get_flight_info(self, **kwargs):
+        def get_flight_info(self, *args, **kwargs):
             pass
 
-        def do_action(self, **kwargs):
+        def do_action(self, *args, **kwargs):
             pass
 
-        def do_get(self, **kwargs):
+        def do_get(self, *args, **kwargs):
             pass
 
-        def do_put(self, **kwargs):
+        def do_put(self, *args, **kwargs):
             pass
 
         def close(self):
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 952b73c93dc..7a560ee5081 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -109,7 +109,7 @@ invalid-argument-type = "ignore"
 #not-iterable = "ignore"
 possibly-unbound-attribute = "ignore"
 possibly-unbound-import = "ignore"
-too-many-positional-arguments = "ignore"
+#too-many-positional-arguments = "ignore"
 #unknown-argument = "ignore"
 unresolved-attribute = "ignore"
 #unresolved-global = "ignore"

From 5d0463e1939105b2522b07f37f457e1451c5ad6c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 00:43:40 +0200
Subject: [PATCH 25/32] Fix invalid-argument-type

---
 python/pyarrow/interchange/from_dataframe.py    |  2 +-
 python/pyarrow/tests/parquet/test_data_types.py |  2 +-
 python/pyarrow/tests/parquet/test_pandas.py     | 12 ++++++------
 python/pyarrow/tests/test_pandas.py             |  8 +++++---
 python/pyarrow/tests/test_table.py              |  2 +-
 python/pyproject.toml                           |  2 +-
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/python/pyarrow/interchange/from_dataframe.py b/python/pyarrow/interchange/from_dataframe.py
index 47ddbb885ff..80ddc8fa024 100644
--- a/python/pyarrow/interchange/from_dataframe.py
+++ b/python/pyarrow/interchange/from_dataframe.py
@@ -346,7 +346,7 @@ def buffers_to_array(
     buffers: ColumnBuffers,
     data_type: Tuple[DtypeKind, int, str, str],
     length: int,
-    describe_null: ColumnNullType,
+    describe_null: Tuple[ColumnNullType, Any],
     offset: int = 0,
     allow_copy: bool = True,
 ) -> pa.Array:
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index b48627d679d..898071d8f85 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -390,7 +390,7 @@ def test_parquet_nested_convenience(tempdir):
 
     read = pq.read_table(
         path, columns=['a'])
-    tm.assert_frame_equal(read.to_pandas(), df[['a']])
+    tm.assert_frame_equal(read.to_pandas(), df[['a']])  # type: ignore[invalid-argument-type]
 
     read = pq.read_table(
         path, columns=['a', 'b'])
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index f9f4e74dc86..96622c4746b 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -425,7 +425,7 @@ def test_backwards_compatible_column_metadata_handling(datadir):
     table = _read_table(
         path, columns=['a'])
     result = table.to_pandas()
-    tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
+    tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))  # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.pandas
@@ -485,7 +485,7 @@ def test_pandas_categorical_roundtrip():
     codes = np.array([2, 0, 0, 2, 0, -1, 2], dtype='int32')
     categories = ['foo', 'bar', 'baz']
     df = pd.DataFrame({'x': pd.Categorical.from_codes(
-        codes, categories=categories)})
+        codes, categories=pd.Index(categories))})
 
     buf = pa.BufferOutputStream()
     pq.write_table(pa.table(df), buf)
@@ -530,15 +530,15 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
 
     pq.write_table(table, str(tempdir / "data.parquet"))
     result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.pandas
@@ -555,7 +555,7 @@ def test_write_to_dataset_pandas_preserve_index(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result, df_cat)
+    tm.assert_frame_equal(result, df_cat)  # type: ignore[invalid-argument-type]
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 45352ee3614..b1d28a61531 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -627,11 +627,11 @@ def test_table_column_subset_metadata(self):
             expected = df[['a']]
             if isinstance(df.index, pd.DatetimeIndex):
                 df.index.freq = None
-            tm.assert_frame_equal(result, expected)
+            tm.assert_frame_equal(result, expected)  # type: ignore[invalid-argument-type]
 
             table_subset2 = table_subset.remove_column(1)
             result = table_subset2.to_pandas()
-            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True))
+            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True))  # type: ignore[invalid-argument-type]
 
     def test_to_pandas_column_subset_multiindex(self):
         # ARROW-10122
@@ -3720,7 +3720,9 @@ def test_table_from_pandas_schema_field_order_metadata():
         coerce_cols_to_types["datetime"] = "datetime64[s, UTC]"
     expected = df[["float", "datetime"]].astype(coerce_cols_to_types)
 
-    tm.assert_frame_equal(result, expected)
+    # TODO: result and expected should have the same type,
+    #  see other ignore[invalid-argument-type] involving assert_frame_equal
+    tm.assert_frame_equal(result, expected)  # type: ignore[invalid-argument-type]
 
 
 # ----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index eeb6c8f8539..e7726fd0023 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -418,7 +418,7 @@ def test_to_pandas_empty_table():
     table = pa.table(df)
     result = table.schema.empty_table().to_pandas()
     assert result.shape == (0, 2)
-    tm.assert_frame_equal(result, df.iloc[:0])
+    tm.assert_frame_equal(result, df.iloc[:0])  # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.pandas
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 7a560ee5081..9229f42087d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -99,7 +99,7 @@ git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow
 fallback_version = '22.0.0a0'
 
 [tool.ty.rules]
-invalid-argument-type = "ignore"
+#invalid-argument-type = "ignore"
 #invalid-assignment = "ignore"
 #invalid-context-manager = "ignore"
 #invalid-return-type = "ignore"

From eadaf0c317649dd077122df8233e8dc0177236bb Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 00:48:22 +0200
Subject: [PATCH 26/32] Fix missing-argument

---
 python/pyarrow/__init__.py                        |  2 +-
 python/pyarrow/tests/parquet/common.py            |  2 +-
 python/pyarrow/tests/parquet/test_data_types.py   |  3 ++-
 python/pyarrow/tests/parquet/test_pandas.py       | 12 ++++++++----
 python/pyarrow/tests/parquet/test_parquet_file.py |  2 +-
 python/pyarrow/tests/test_pandas.py               |  6 ++++--
 python/pyproject.toml                             |  2 +-
 7 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 1170db23424..2b96edee84e 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -52,7 +52,7 @@ def parse_git(root, **kwargs):
             from setuptools_scm.git import parse
             kwargs['describe_command'] = \
                 "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
-            return parse(root, **kwargs)
+            return parse(root, **kwargs)  # type: ignore[missing-argument]
         __version__ = setuptools_scm.get_version('../',
                                                  parse=parse_git)
     except ImportError:
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 28e04abf1c5..8ce804262d1 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -41,7 +41,7 @@ def _write_table(table, path, **kwargs):
 def _read_table(*args, **kwargs):
     import pyarrow.parquet as pq
 
-    table = pq.read_table(*args, **kwargs)
+    table = pq.read_table(*args, **kwargs)  # type: ignore[missing-argument]
     table.validate(full=True)
     return table
 
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 898071d8f85..9f8f5212382 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -390,7 +390,8 @@ def test_parquet_nested_convenience(tempdir):
 
     read = pq.read_table(
         path, columns=['a'])
-    tm.assert_frame_equal(read.to_pandas(), df[['a']])  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(read.to_pandas(), df[['a']]) \
+        # type: ignore[invalid-argument-type]
 
     read = pq.read_table(
         path, columns=['a', 'b'])
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 96622c4746b..5f9fdc7896d 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -425,7 +425,8 @@ def test_backwards_compatible_column_metadata_handling(datadir):
     table = _read_table(
         path, columns=['a'])
     result = table.to_pandas()
-    tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result, expected[['a']].reset_index(
+        drop=True))  # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.pandas
@@ -530,15 +531,18 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
+        # type: ignore[invalid-argument-type]
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
+        # type: ignore[invalid-argument-type]
 
     pq.write_table(table, str(tempdir / "data.parquet"))
     result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
+        # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index b09c26c7144..4d4b467e9d3 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -325,7 +325,7 @@ def test_parquet_file_with_filesystem(s3_example_fs, use_uri):
     table = pa.table({"a": range(10)})
     pq.write_table(table, s3_path, filesystem=s3_fs)
 
-    parquet_file = pq.ParquetFile(*args, **kwargs)
+    parquet_file = pq.ParquetFile(*args, **kwargs)  # type: ignore[missing-argument]
     assert parquet_file.read() == table
     assert not parquet_file.closed
     parquet_file.close()
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index b1d28a61531..287b761a0a8 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -627,11 +627,13 @@ def test_table_column_subset_metadata(self):
             expected = df[['a']]
             if isinstance(df.index, pd.DatetimeIndex):
                 df.index.freq = None
-            tm.assert_frame_equal(result, expected)  # type: ignore[invalid-argument-type]
+            tm.assert_frame_equal(result, expected) \
+                # type: ignore[invalid-argument-type]
 
             table_subset2 = table_subset.remove_column(1)
             result = table_subset2.to_pandas()
-            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True))  # type: ignore[invalid-argument-type]
+            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True)) \
+                # type: ignore[invalid-argument-type]
 
     def test_to_pandas_column_subset_multiindex(self):
         # ARROW-10122
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 9229f42087d..1ee5e6930d5 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -116,5 +116,5 @@ unresolved-attribute = "ignore"
 unresolved-import = "ignore"
 #unresolved-reference = "ignore"
 #unsupported-operator = "ignore"
-missing-argument = "ignore"
+#missing-argument = "ignore"
 #call-non-callable = "ignore"

From f7f7f5e1cf958faeacec70f92d6a80d5d1a9cc7d Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 00:54:38 +0200
Subject: [PATCH 27/32] Fix unresolved-import

---
 python/pyarrow/tests/test_pandas.py | 8 +++-----
 python/pyproject.toml               | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 287b761a0a8..3c3d874395e 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -3266,7 +3266,7 @@ def test_error_sparse(self):
             df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
         except AttributeError:
             # pandas.arrays module introduced in pandas 0.24
-            from pandas import SparseArray
+            from pandas import SparseArray  # type: ignore[unresolved-import]
             df = pd.DataFrame({'a': SparseArray([1, np.nan, 3])})
         with pytest.raises(TypeError, match="Sparse pandas data"):
             pa.Table.from_pandas(df)
@@ -4427,12 +4427,11 @@ def test_convert_to_extension_array(monkeypatch):
 
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     if Version(pd.__version__) < Version("1.3.0.dev"):
-        from pandas.core import integer
+        from pandas.core import integer   # type: ignore[unresolved-import]
         monkeypatch.delattr(
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            # type: ignore[unresolved-attribute]
             pd.core.arrays.integer.NumericDtype, "__from_arrow__")
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
@@ -4474,12 +4473,11 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     # (remove the version added above and the actual version for recent pandas)
     if Version(pd.__version__) < Version("1.3.0.dev"):
-        from pandas.core import integer
+        from pandas.core import integer  # type: ignore[unresolved-import]
         monkeypatch.delattr(
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            # type: ignore[unresolved-attribute]
             pd.core.arrays.integer.NumericDtype, "__from_arrow__")
 
     result = arr.to_pandas()
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 1ee5e6930d5..b293ad834ef 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -113,7 +113,7 @@ possibly-unbound-import = "ignore"
 #unknown-argument = "ignore"
 unresolved-attribute = "ignore"
 #unresolved-global = "ignore"
-unresolved-import = "ignore"
+#unresolved-import = "ignore"
 #unresolved-reference = "ignore"
 #unsupported-operator = "ignore"
 #missing-argument = "ignore"

From d769e72d233ff7f24cd6ed9d6abb1bdca5a1a8c3 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 01:28:51 +0200
Subject: [PATCH 28/32] Fix possibly-unbound-import

---
 python/pyarrow/conftest.py           | 10 +++---
 python/pyarrow/tests/test_acero.py   |  2 +-
 python/pyarrow/tests/test_dataset.py |  4 +--
 python/pyarrow/tests/test_fs.py      | 49 ++++++++++++++--------------
 python/pyproject.toml                |  2 +-
 python/stubs/parquet/core.pyi        |  2 +-
 6 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index d1b1567389b..563c98bfdc8 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -186,25 +186,25 @@
     pass
 
 try:
-    from pyarrow.fs import AzureFileSystem  # noqa
+    from pyarrow.fs import AzureFileSystem  # type: ignore[possibly-unbound-import]  # noqa
     defaults['azure'] = True
 except ImportError:
     pass
 
 try:
-    from pyarrow.fs import GcsFileSystem  # noqa
+    from pyarrow.fs import GcsFileSystem  # type: ignore[possibly-unbound-import]  # noqa
     defaults['gcs'] = True
 except ImportError:
     pass
 
 try:
-    from pyarrow.fs import S3FileSystem  # noqa
+    from pyarrow.fs import S3FileSystem  # type: ignore[possibly-unbound-import]  # noqa
     defaults['s3'] = True
 except ImportError:
     pass
 
 try:
-    from pyarrow.fs import HadoopFileSystem  # noqa
+    from pyarrow.fs import HadoopFileSystem  # type: ignore[possibly-unbound-import]  # noqa
     defaults['hdfs'] = True
 except ImportError:
     pass
@@ -250,7 +250,7 @@ def pytest_ignore_collect(collection_path, config):
 
         if 'pyarrow/fs' in str(collection_path):
             try:
-                from pyarrow.fs import S3FileSystem  # noqa
+                from pyarrow.fs import S3FileSystem  # type: ignore[possibly-unbound-import]  # noqa
                 return False
             except ImportError:
                 return True
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index 8dba7471b49..ac58792cd50 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -37,7 +37,7 @@
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow.acero import ScanNodeOptions
+    from pyarrow.acero import ScanNodeOptions  # type: ignore[possibly-unbound-import]
 except ImportError:
     pass
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 8dfdbcb8c2a..b797c49a1ba 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -3437,7 +3437,7 @@ def test_orc_scan_options(tempdir, dataset_reader):
 
 def test_orc_format_not_supported():
     try:
-        from pyarrow.dataset import OrcFileFormat  # noqa
+        from pyarrow.dataset import OrcFileFormat  # type: ignore[possibly-unbound-import]  # noqa
     except ImportError:
         # ORC is not available, test error message
         with pytest.raises(
@@ -5138,7 +5138,7 @@ def test_write_dataset_s3_put_only(s3_server):
     # required while writing a dataset in s3 where we have very
     # limited permissions and thus we can directly write the dataset
     # without creating a directory.
-    from pyarrow.fs import S3FileSystem
+    from pyarrow.fs import S3FileSystem  # type: ignore[possibly-unbound-import]
 
     # write dataset with s3 filesystem
     host, port, _, _ = s3_server['connection']
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 61dcb76b247..7c891c7919d 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -39,6 +39,31 @@
                         copy_files)
 from pyarrow.util import find_free_port
 
+try:
+    from pyarrow.fs import (
+        AwsDefaultS3RetryStrategy,  # type: ignore[possibly-unbound-import]
+        AwsStandardS3RetryStrategy,  # type: ignore[possibly-unbound-import]
+        S3FileSystem,  # type: ignore[possibly-unbound-import]
+        resolve_s3_region,  # type: ignore[possibly-unbound-import]
+        S3RetryStrategy  # type: ignore[possibly-unbound-import]
+    )
+except ImportError:
+    pass
+
+try:
+    from pyarrow.fs import AzureFileSystem  # type: ignore[possibly-unbound-import]
+except ImportError:
+    pass
+
+try:
+    from pyarrow.fs import GcsFileSystem  # type: ignore[possibly-unbound-import]
+except ImportError:
+    pass
+
+try:
+    from pyarrow.fs import HadoopFileSystem  # type: ignore[possibly-unbound-import]
+except ImportError:
+    pass
 
 here = os.path.dirname(os.path.abspath(__file__))
 
@@ -211,7 +236,6 @@ def subtree_localfs(request, tempdir, localfs):
 @pytest.fixture
 def gcsfs(request, gcs_server):
     request.config.pyarrow.requires('gcs')
-    from pyarrow.fs import GcsFileSystem
 
     host, port = gcs_server['connection']
     bucket = 'pyarrow-filesystem/'
@@ -241,7 +265,6 @@ def gcsfs(request, gcs_server):
 @pytest.fixture
 def s3fs(request, s3_server):
     request.config.pyarrow.requires('s3')
-    from pyarrow.fs import S3FileSystem
 
     host, port, access_key, secret_key = s3_server['connection']
     bucket = 'pyarrow-filesystem/'
@@ -301,7 +324,6 @@ def subtree_s3fs(request, s3fs):
 @pytest.fixture
 def azurefs(request, azure_server):
     request.config.pyarrow.requires('azure')
-    from pyarrow.fs import AzureFileSystem
 
     host, port, account_name, account_key = azure_server['connection']
     azurite_authority = f"{host}:{port}"
@@ -333,8 +355,6 @@ def hdfs(request, hdfs_connection):
     if not pa.have_libhdfs():
         pytest.skip('Cannot locate libhdfs')
 
-    from pyarrow.fs import HadoopFileSystem
-
     host, port, user = hdfs_connection
     fs = HadoopFileSystem(host, port=port, user=user)
 
@@ -515,7 +535,6 @@ def skip_azure(fs, reason):
 
 @pytest.mark.s3
 def test_s3fs_limited_permissions_create_bucket(s3_server):
-    from pyarrow.fs import S3FileSystem
     _configure_s3_limited_user(s3_server, _minio_limited_policy,
                                'test_fs_limited_user', 'limited123')
     host, port, _, _ = s3_server['connection']
@@ -1147,7 +1166,6 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
 @pytest.mark.gcs
 def test_gcs_options(pickle_module):
-    from pyarrow.fs import GcsFileSystem
     dt = datetime.now()
     fs = GcsFileSystem(access_token='abc',
                        target_service_account='service_account@apache',
@@ -1185,10 +1203,6 @@ def test_gcs_options(pickle_module):
 
 @pytest.mark.s3
 def test_s3_options(pickle_module):
-    from pyarrow.fs import (AwsDefaultS3RetryStrategy,
-                            AwsStandardS3RetryStrategy, S3FileSystem,
-                            S3RetryStrategy)
-
     fs = S3FileSystem(access_key='access', secret_key='secret',
                       session_token='token', region='us-east-2',
                       scheme='https', endpoint_override='localhost:8999')
@@ -1289,8 +1303,6 @@ def test_s3_options(pickle_module):
 
 @pytest.mark.s3
 def test_s3_proxy_options(monkeypatch, pickle_module):
-    from pyarrow.fs import S3FileSystem
-
     # The following two are equivalent:
     proxy_opts_1_dict = {'scheme': 'http', 'host': 'localhost', 'port': 8999}
     proxy_opts_1_str = 'http://localhost:8999'
@@ -1430,8 +1442,6 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
 
 @pytest.mark.s3
 def test_s3fs_wrong_region():
-    from pyarrow.fs import S3FileSystem
-
     # wrong region for bucket
     # anonymous=True incase CI/etc has invalid credentials
     fs = S3FileSystem(region='eu-north-1', anonymous=True)
@@ -1454,8 +1464,6 @@ def test_s3fs_wrong_region():
 
 @pytest.mark.azure
 def test_azurefs_options(pickle_module):
-    from pyarrow.fs import AzureFileSystem
-
     fs1 = AzureFileSystem(account_name='fake-account-name')
     assert isinstance(fs1, AzureFileSystem)
     assert pickle_module.loads(pickle_module.dumps(fs1)) == fs1
@@ -1548,7 +1556,6 @@ def test_azurefs_options(pickle_module):
 
 @pytest.mark.hdfs
 def test_hdfs_options(hdfs_connection, pickle_module):
-    from pyarrow.fs import HadoopFileSystem
     if not pa.have_libhdfs():
         pytest.skip('Cannot locate libhdfs')
 
@@ -1655,8 +1662,6 @@ def test_filesystem_from_path_object(path):
 
 @pytest.mark.s3
 def test_filesystem_from_uri_s3(s3_server):
-    from pyarrow.fs import S3FileSystem
-
     host, port, access_key, secret_key = s3_server['connection']
 
     uri = f"s3://{access_key}:{secret_key}@mybucket/foo/bar?scheme=http&" \
@@ -1674,8 +1679,6 @@ def test_filesystem_from_uri_s3(s3_server):
 
 @pytest.mark.gcs
 def test_filesystem_from_uri_gcs(gcs_server):
-    from pyarrow.fs import GcsFileSystem
-
     host, port = gcs_server['connection']
 
     uri = ("gs://anonymous@" +
@@ -1864,7 +1867,6 @@ def test_py_open_append_stream():
 def test_s3_real_aws():
     # Exercise connection code with an AWS-backed S3 bucket.
     # This is a minimal integration check for ARROW-9261 and similar issues.
-    from pyarrow.fs import S3FileSystem
     default_region = (os.environ.get('PYARROW_TEST_S3_REGION') or
                       'us-east-1')
     fs = S3FileSystem(anonymous=True)
@@ -1920,7 +1922,6 @@ def test_s3_real_aws_region_selection():
 
 @pytest.mark.s3
 def test_resolve_s3_region():
-    from pyarrow.fs import resolve_s3_region
     assert resolve_s3_region('voltrondata-labs-datasets') == 'us-east-2'
     assert resolve_s3_region('mf-nwp-models') == 'eu-west-1'
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index b293ad834ef..7aaf602966e 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -108,7 +108,7 @@ fallback_version = '22.0.0a0'
 #non-subscriptable = "ignore"
 #not-iterable = "ignore"
 possibly-unbound-attribute = "ignore"
-possibly-unbound-import = "ignore"
+#possibly-unbound-import = "ignore"
 #too-many-positional-arguments = "ignore"
 #unknown-argument = "ignore"
 unresolved-attribute = "ignore"
diff --git a/python/stubs/parquet/core.pyi b/python/stubs/parquet/core.pyi
index 01dce442feb..5ad47403821 100644
--- a/python/stubs/parquet/core.pyi
+++ b/python/stubs/parquet/core.pyi
@@ -30,7 +30,7 @@ from pyarrow._parquet import (  # type: ignore[unresolved_import]
     Statistics,
 )
 from pyarrow._stubs_typing import FilterTuple, SingleOrList  # type: ignore[unresolved_import]
-from pyarrow.dataset import ParquetFileFragment, Partitioning
+from pyarrow.dataset import ParquetFileFragment, Partitioning  # type: ignore[possibly-unbound-import]
 from pyarrow.lib import NativeFile, RecordBatch, Schema, Table  # type: ignore[unresolved_import]
 from typing_extensions import deprecated
 

From 954c49daf0e593298a4093d517489c37d2fc395a Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 03:10:50 +0200
Subject: [PATCH 29/32] Fix possibly-unbound-import

---
 .../dataset/write_dataset_encrypted.py        |  15 +-
 python/pyarrow/pandas_compat.py               |   6 +-
 python/pyarrow/parquet/core.py                |   5 +-
 .../interchange/test_interchange_spec.py      |   1 -
 python/pyarrow/tests/parquet/test_metadata.py |   4 +-
 python/pyarrow/tests/test_csv.py              |   2 +-
 python/pyarrow/tests/test_cuda.py             |   4 +-
 .../pyarrow/tests/test_cuda_numba_interop.py  |   5 +-
 python/pyarrow/tests/test_dataset.py          | 141 ++++++++++--------
 .../pyarrow/tests/test_dataset_encryption.py  |  35 +++--
 python/pyarrow/tests/test_feather.py          |   2 +-
 python/pyarrow/tests/test_jvm.py              |   2 +-
 python/pyarrow/tests/wsgi_examples.py         |   2 +-
 python/pyproject.toml                         |   2 +-
 python/setup.py                               |  12 +-
 15 files changed, 133 insertions(+), 105 deletions(-)

diff --git a/python/examples/dataset/write_dataset_encrypted.py b/python/examples/dataset/write_dataset_encrypted.py
index 910559939e6..5d8160b02c9 100644
--- a/python/examples/dataset/write_dataset_encrypted.py
+++ b/python/examples/dataset/write_dataset_encrypted.py
@@ -67,16 +67,19 @@ def kms_factory(kms_connection_configuration):
 
 
 crypto_factory = pe.CryptoFactory(kms_factory)
-parquet_encryption_cfg = ds.ParquetEncryptionConfig(
+parquet_encryption_cfg = ds.ParquetEncryptionConfig( \
+    # type: ignore[possibly-unbound-attribute]
     crypto_factory, kms_connection_config, encryption_config)
-parquet_decryption_cfg = ds.ParquetDecryptionConfig(crypto_factory,
-                                                    kms_connection_config,
-                                                    decryption_config)
+parquet_decryption_cfg = ds.ParquetDecryptionConfig( \
+    # type: ignore[possibly-unbound-attribute]
+    crypto_factory, kms_connection_config, decryption_config)
 
 # set encryption config for parquet fragment scan options
-pq_scan_opts = ds.ParquetFragmentScanOptions()
+pq_scan_opts = ds.ParquetFragmentScanOptions() \
+    # type: ignore[possibly-unbound-attribute]
 pq_scan_opts.parquet_decryption_config = parquet_decryption_cfg
-pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
+pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts) \
+    # type: ignore[possibly-unbound-attribute]
 
 if os.path.exists('sample_dataset'):
     shutil.rmtree('sample_dataset')
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index db81b40d334..131025e60c0 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -755,8 +755,10 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
         # create ExtensionBlock
         arr = item['py_array']
         assert len(placement) == 1
-        name = columns.get(placement[0], None)
-        pandas_dtype = extension_columns.get(name, None)
+        name = columns.get(placement[0], None) \
+            # type: ignore[possibly-unbound-attribute]
+        pandas_dtype = extension_columns.get(name, None) \
+            # type: ignore[possibly-unbound-attribute]
         if not hasattr(pandas_dtype, '__from_arrow__'):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 7b6c57f9683..d38b3fbff92 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1404,7 +1404,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
             else:
                 single_file = path_or_paths
 
-        parquet_format = ds.ParquetFileFormat(**read_options)
+        parquet_format = ds.ParquetFileFormat(**read_options) \
+            # type: ignore[possibly-unbound-attribute]
 
         if single_file is not None:
             fragment = parquet_format.make_fragment(single_file, filesystem)
@@ -2200,7 +2201,7 @@ def file_visitor(written_file):
             metadata_collector.append(written_file.metadata)
 
     # map format arguments
-    parquet_format = ds.ParquetFileFormat()
+    parquet_format = ds.ParquetFileFormat()  # type: ignore[possibly-unbound-attribute]
     write_options = parquet_format.make_write_options(**kwargs)
 
     # map old filesystems to new one
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 68afc0c633b..14e2aab4bfb 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -22,7 +22,6 @@
 import pyarrow.tests.strategies as past
 import pytest
 
-np = None
 try:
     import numpy as np
 except ImportError:
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index d180fbfb4e5..3386f77bb1a 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -494,12 +494,12 @@ def test_multi_dataset_metadata(tempdir):
 
     # Write merged metadata-only file
     with open(metapath, "wb") as f:
-        _meta.write_metadata_file(f)
+        _meta.write_metadata_file(f)  # type: ignore[possibly-unbound-attribute]
 
     # Read back the metadata
     meta = pq.read_metadata(metapath)
     md = meta.to_dict()
-    _md = _meta.to_dict()
+    _md = _meta.to_dict()  # type: ignore[possibly-unbound-attribute]
     for key in _md:
         if key != 'serialized_size':
             assert _md[key] == md[key]
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 2794d07e87c..170f62a43bd 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1502,7 +1502,7 @@ def signal_from_thread():
 
         # Interruption should have arrived timely
         assert last_duration <= 2.0
-        e = exc_info.__context__
+        e = exc_info.__context__  # type: ignore[possibly-unbound-attribute]
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index e06f479987c..d8298eec773 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -42,8 +42,8 @@
     not has_ipc_support,
     reason='CUDA IPC not supported in platform `%s`' % (platform))
 
-global_context = None  # for flake8
-global_context1 = None  # for flake8
+global_context = cuda.Context(0)  # for flake8
+global_context1 = cuda.Context(0)  # for flake8
 
 
 def setup_module(module):
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index f211f0046f0..cfcf6673755 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -30,7 +30,7 @@
     # type: ignore[unresolved_import]  # noqa: E402
 
 
-context_choices = None
+context_choices = {}
 context_choice_ids = ['pyarrow.cuda', 'numba.cuda']
 
 
@@ -73,7 +73,8 @@ def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
         return arr, buf
     elif target == 'device':
         arr, buf = make_random_buffer(size, target='host', dtype=dtype)
-        dbuf = ctx.new_buffer(size * dtype.itemsize)
+        dbuf = ctx.new_buffer(size * dtype.itemsize) \
+            # type: ignore[possibly-unbound-attribute]
         dbuf.copy_from_host(buf, position=0, nbytes=buf.size)
         return arr, dbuf
     raise ValueError('invalid target value')
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index b797c49a1ba..6303b47bd44 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -53,11 +53,24 @@
 
 try:
     import pyarrow.dataset as ds
+    from pyarrow.dataset import (
+        ParquetFragmentScanOptions, ParquetReadOptions, ParquetFileFragment \
+        # type: ignore[possibly-unbound-attribute]
+    )
+except ImportError:
+    pass
+
+try:
+    from pyarrow.dataset import (
+        OrcFileFormat  # type: ignore[possibly-unbound-attribute]
+    )
 except ImportError:
     pass
 
 try:
     import pyarrow.parquet as pq
+    from pyarrow.parquet import ParquetFileFormat \
+        # type: ignore[possibly-unbound-attribute]
 except ImportError:
     pass
 
@@ -270,7 +283,7 @@ def multisourcefs(request):
 
 @pytest.fixture
 def dataset(mockfs):
-    format = ds.ParquetFileFormat()
+    format = ParquetFileFormat()
     selector = fs.FileSelector('subdir', recursive=True)
     options = ds.FileSystemFactoryOptions('subdir')
     options.partitioning = ds.DirectoryPartitioning(
@@ -338,7 +351,7 @@ def test_filesystem_dataset(mockfs):
     schema = pa.schema([
         pa.field('const', pa.int64())
     ])
-    file_format = ds.ParquetFileFormat()
+    file_format = ParquetFileFormat()
     paths = ['subdir/1/xxx/file0.parquet', 'subdir/2/yyy/file1.parquet']
     partitions = [ds.field('part') == x for x in range(1, 3)]
     fragments = [file_format.make_fragment(path, mockfs, part)
@@ -356,7 +369,7 @@ def test_filesystem_dataset(mockfs):
 
     for dataset in [dataset_from_fragments, dataset_from_paths]:
         assert isinstance(dataset, ds.FileSystemDataset)
-        assert isinstance(dataset.format, ds.ParquetFileFormat)
+        assert isinstance(dataset.format, ParquetFileFormat)
         assert dataset.partition_expression.equals(root_partition)
         assert set(dataset.files) == set(paths)
 
@@ -364,14 +377,14 @@ def test_filesystem_dataset(mockfs):
         for fragment, partition, path in zip(fragments, partitions, paths):
             assert fragment.partition_expression.equals(partition)
             assert fragment.path == path
-            assert isinstance(fragment.format, ds.ParquetFileFormat)
-            assert isinstance(fragment, ds.ParquetFileFragment)
+            assert isinstance(fragment.format, ParquetFileFormat)
+            assert isinstance(fragment, ParquetFileFragment)
             assert fragment.row_groups == [0]
             assert fragment.num_row_groups == 1
 
             row_group_fragments = list(fragment.split_by_row_group())
             assert fragment.num_row_groups == len(row_group_fragments) == 1
-            assert isinstance(row_group_fragments[0], ds.ParquetFileFragment)
+            assert isinstance(row_group_fragments[0], ParquetFileFragment)
             assert row_group_fragments[0].path == path
             assert row_group_fragments[0].row_groups == [0]
             assert row_group_fragments[0].num_row_groups == 1
@@ -490,7 +503,7 @@ def test_dataset(dataset, dataset_reader):
 def test_dataset_factory_inspect_schema_promotion(promotable_mockfs):
     mockfs, path1, path2 = promotable_mockfs
     factory = ds.FileSystemDatasetFactory(
-        mockfs, [path1, path2], ds.ParquetFileFormat()
+        mockfs, [path1, path2], ParquetFileFormat()
     )
 
     with pytest.raises(
@@ -534,7 +547,7 @@ def test_dataset_factory_inspect_schema_promotion(promotable_mockfs):
 def test_dataset_factory_inspect_bad_params(promotable_mockfs):
     mockfs, path1, path2 = promotable_mockfs
     factory = ds.FileSystemDatasetFactory(
-        mockfs, [path1, path2], ds.ParquetFileFormat()
+        mockfs, [path1, path2], ParquetFileFormat()
     )
 
     with pytest.raises(ValueError, match='Invalid promote_options: bad_option'):
@@ -942,11 +955,11 @@ def test_partition_keys():
 
 @pytest.mark.parquet
 def test_parquet_read_options():
-    opts1 = ds.ParquetReadOptions()
-    opts2 = ds.ParquetReadOptions(dictionary_columns=['a', 'b'])
-    opts3 = ds.ParquetReadOptions(coerce_int96_timestamp_unit="ms")
-    opts4 = ds.ParquetReadOptions(binary_type=pa.binary_view())
-    opts5 = ds.ParquetReadOptions(list_type=pa.LargeListType)
+    opts1 = ParquetReadOptions()
+    opts2 = ParquetReadOptions(dictionary_columns=['a', 'b'])
+    opts3 = ParquetReadOptions(coerce_int96_timestamp_unit="ms")
+    opts4 = ParquetReadOptions(binary_type=pa.binary_view())
+    opts5 = ParquetReadOptions(list_type=pa.LargeListType)
 
     assert opts1.dictionary_columns == set()
 
@@ -984,37 +997,37 @@ def test_parquet_read_options():
 
 @pytest.mark.parquet
 def test_parquet_file_format_read_options():
-    pff1 = ds.ParquetFileFormat()
-    pff2 = ds.ParquetFileFormat(dictionary_columns={'a'})
-    pff3 = ds.ParquetFileFormat(coerce_int96_timestamp_unit="s")
-    pff4 = ds.ParquetFileFormat(binary_type=pa.binary_view())
-    pff5 = ds.ParquetFileFormat(list_type=pa.LargeListType)
-
-    assert pff1.read_options == ds.ParquetReadOptions()
-    assert pff2.read_options == ds.ParquetReadOptions(dictionary_columns=['a'])
-    assert pff3.read_options == ds.ParquetReadOptions(
+    pff1 = ParquetFileFormat()
+    pff2 = ParquetFileFormat(dictionary_columns={'a'})
+    pff3 = ParquetFileFormat(coerce_int96_timestamp_unit="s")
+    pff4 = ParquetFileFormat(binary_type=pa.binary_view())
+    pff5 = ParquetFileFormat(list_type=pa.LargeListType)
+
+    assert pff1.read_options == ParquetReadOptions()
+    assert pff2.read_options == ParquetReadOptions(dictionary_columns=['a'])
+    assert pff3.read_options == ParquetReadOptions(
         coerce_int96_timestamp_unit="s")
-    assert pff4.read_options == ds.ParquetReadOptions(
+    assert pff4.read_options == ParquetReadOptions(
         binary_type=pa.binary_view())
-    assert pff5.read_options == ds.ParquetReadOptions(
+    assert pff5.read_options == ParquetReadOptions(
         list_type=pa.LargeListType)
 
 
 @pytest.mark.parquet
 def test_parquet_scan_options():
-    opts1 = ds.ParquetFragmentScanOptions()
-    opts2 = ds.ParquetFragmentScanOptions(buffer_size=4096)
-    opts3 = ds.ParquetFragmentScanOptions(
+    opts1 = ParquetFragmentScanOptions()
+    opts2 = ParquetFragmentScanOptions(buffer_size=4096)
+    opts3 = ParquetFragmentScanOptions(
         buffer_size=2**13, use_buffered_stream=True)
-    opts4 = ds.ParquetFragmentScanOptions(buffer_size=2**13, pre_buffer=False)
-    opts5 = ds.ParquetFragmentScanOptions(
+    opts4 = ParquetFragmentScanOptions(buffer_size=2**13, pre_buffer=False)
+    opts5 = ParquetFragmentScanOptions(
         thrift_string_size_limit=123456,
         thrift_container_size_limit=987654,)
-    opts6 = ds.ParquetFragmentScanOptions(
+    opts6 = ParquetFragmentScanOptions(
         page_checksum_verification=True)
     cache_opts = pa.CacheOptions(
         hole_size_limit=2**10, range_size_limit=8*2**10, lazy=True)
-    opts7 = ds.ParquetFragmentScanOptions(pre_buffer=True, cache_options=cache_opts)
+    opts7 = ParquetFragmentScanOptions(pre_buffer=True, cache_options=cache_opts)
 
     assert opts1.use_buffered_stream is False
     assert opts1.buffer_size == 2**13
@@ -1076,16 +1089,16 @@ def test_file_format_pickling(pickle_module):
             use_threads=False, block_size=14)),
     ]
     try:
-        formats.append(ds.OrcFileFormat())
+        formats.append(OrcFileFormat())
     except ImportError:
         pass
 
     if pq is not None:
         formats.extend([
-            ds.ParquetFileFormat(),
-            ds.ParquetFileFormat(dictionary_columns={'a'}),
-            ds.ParquetFileFormat(use_buffered_stream=True),
-            ds.ParquetFileFormat(
+            ParquetFileFormat(),
+            ParquetFileFormat(dictionary_columns={'a'}),
+            ParquetFileFormat(use_buffered_stream=True),
+            ParquetFileFormat(
                 use_buffered_stream=True,
                 buffer_size=4096,
                 thrift_string_size_limit=123,
@@ -1114,8 +1127,8 @@ def test_fragment_scan_options_pickling(pickle_module):
 
     if pq is not None:
         options.extend([
-            ds.ParquetFragmentScanOptions(buffer_size=4096),
-            ds.ParquetFragmentScanOptions(pre_buffer=True),
+            ParquetFragmentScanOptions(buffer_size=4096),
+            ParquetFragmentScanOptions(pre_buffer=True),
         ])
 
     for option in options:
@@ -1132,8 +1145,8 @@ def test_fragment_scan_options_pickling(pickle_module):
 @pytest.mark.parametrize('pre_buffer', [False, True])
 @pytest.mark.parquet
 def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
-    format = ds.ParquetFileFormat(
-        read_options=ds.ParquetReadOptions(dictionary_columns={"str"}),
+    format = ParquetFileFormat(
+        read_options=ParquetReadOptions(dictionary_columns={"str"}),
         pre_buffer=pre_buffer
     )
 
@@ -1205,7 +1218,7 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
 
 @pytest.mark.parquet
 def test_make_fragment(multisourcefs):
-    parquet_format = ds.ParquetFileFormat()
+    parquet_format = ParquetFileFormat()
     dataset = ds.dataset('/plain', filesystem=multisourcefs,
                          format=parquet_format)
 
@@ -1216,7 +1229,7 @@ def test_make_fragment(multisourcefs):
         row_group_fragment = parquet_format.make_fragment(path, multisourcefs,
                                                           row_groups=[0])
         for f in [fragment, row_group_fragment]:
-            assert isinstance(f, ds.ParquetFileFragment)
+            assert isinstance(f, ParquetFileFragment)
             assert f.path == path
             assert isinstance(f.filesystem, type(multisourcefs))
         assert row_group_fragment.row_groups == [0]
@@ -1232,7 +1245,7 @@ def test_make_fragment_with_size(s3_example_simple):
     """
     table, path, fs, uri, host, port, access_key, secret_key = s3_example_simple
 
-    file_format = ds.ParquetFileFormat()
+    file_format = ParquetFileFormat()
     paths = [path]
 
     fragments = [file_format.make_fragment(path, fs)
@@ -1339,8 +1352,8 @@ def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module):
         arrays[1],
         arrays[2].dictionary_encode()
     ]
-    dictionary_format = ds.ParquetFileFormat(
-        read_options=ds.ParquetReadOptions(
+    dictionary_format = ParquetFileFormat(
+        read_options=ParquetReadOptions(
             dictionary_columns=['alpha', 'animal']
         ),
         use_buffered_stream=True,
@@ -1348,7 +1361,7 @@ def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module):
     )
 
     cases = [
-        (arrays, ds.ParquetFileFormat()),
+        (arrays, ParquetFileFormat()),
         (dictionary_arrays, dictionary_format)
     ]
     for arrays, format_ in cases:
@@ -1952,7 +1965,7 @@ def test_fragments_repr(tempdir, dataset):
     "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))])
 def test_partitioning_factory(mockfs, pickled, pickle_module):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
-    format = ds.ParquetFileFormat()
+    format = ParquetFileFormat()
 
     options = ds.FileSystemFactoryOptions('subdir')
     partitioning_factory = ds.DirectoryPartitioning.discover(['group', 'key'])
@@ -1987,7 +2000,7 @@ def test_partitioning_factory(mockfs, pickled, pickle_module):
 def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled,
                                          pickle_module):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
-    format = ds.ParquetFileFormat()
+    format = ParquetFileFormat()
     options = ds.FileSystemFactoryOptions('subdir')
 
     partitioning_factory = ds.DirectoryPartitioning.discover(
@@ -2595,12 +2608,12 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
     child1 = ds.FileSystemDatasetFactory(
         multisourcefs,
         fs.FileSelector('/plain'),
-        format=ds.ParquetFileFormat()
+        format=ParquetFileFormat()
     )
     child2 = ds.FileSystemDatasetFactory(
         multisourcefs,
         fs.FileSelector('/schema'),
-        format=ds.ParquetFileFormat()
+        format=ParquetFileFormat()
     )
     batch1 = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     batch2 = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["b"])
@@ -3072,7 +3085,7 @@ def test_file_format_inspect_fsspec(tempdir):
     assert fsspec_fs.ls(tempdir)[0].endswith("data.parquet")
 
     # inspect using dataset file format
-    format = ds.ParquetFileFormat()
+    format = ParquetFileFormat()
     # manually creating a PyFileSystem instead of using fs._ensure_filesystem
     # which would convert an fsspec local filesystem to a native one
     filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))
@@ -3159,7 +3172,7 @@ def test_filter_compute_expression(tempdir, dataset_reader):
 def test_dataset_union(multisourcefs):
     child = ds.FileSystemDatasetFactory(
         multisourcefs, fs.FileSelector('/plain'),
-        format=ds.ParquetFileFormat()
+        format=ParquetFileFormat()
     )
     factory = ds.UnionDatasetFactory([child])
 
@@ -3382,7 +3395,7 @@ def test_orc_format(tempdir, dataset_reader):
     path = str(tempdir / 'test.orc')
     orc.write_table(table, path)
 
-    dataset = ds.dataset(path, format=ds.OrcFileFormat())
+    dataset = ds.dataset(path, format=OrcFileFormat())
     fragments = list(dataset.get_fragments())
     assert isinstance(fragments[0], ds.FileFragment)
     result = dataset_reader.to_table(dataset)
@@ -3456,7 +3469,7 @@ def test_orc_writer_not_implemented_for_dataset():
             pa.table({"a": range(10)}), format='orc', base_dir='/tmp'
         )
 
-    of = ds.OrcFileFormat()
+    of = OrcFileFormat()
     with pytest.raises(
         NotImplementedError,
         match="Writing datasets not yet implemented for this file format"
@@ -4922,7 +4935,7 @@ def test_write_dataset_parquet(tempdir):
 
     # using custom options
     for version in ["1.0", "2.4", "2.6"]:
-        format = ds.ParquetFileFormat()
+        format = ParquetFileFormat()
         opts = format.make_write_options(version=version)
         assert "<pyarrow.dataset.ParquetFileWriteOptions" in repr(opts)
         base_dir = tempdir / f'parquet_dataset_version{version}'
@@ -5220,7 +5233,7 @@ def test_dataset_null_to_dictionary_cast(tempdir, dataset_reader):
     fsds = ds.FileSystemDataset.from_paths(
         paths=[tempdir / "test.parquet"],
         schema=schema,
-        format=ds.ParquetFileFormat(),
+        format=ParquetFileFormat(),
         filesystem=fs.LocalFileSystem(),
     )
     table = dataset_reader.to_table(fsds)
@@ -5728,7 +5741,7 @@ def test_write_dataset_write_page_index(tempdir):
             arrays = [[1, 2, 3], [None, 5, None]]
             table = pa.Table.from_arrays(arrays, schema=schema)
 
-            file_format = ds.ParquetFileFormat()
+            file_format = ParquetFileFormat()
             base_dir = tempdir / f"write_page_index_{write_page_index}"
             ds.write_dataset(
                 table,
@@ -5808,7 +5821,7 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
     table_orig = pa.table({'a': [1, 2, 3, 4]})
 
     # Write a sample dataset with page checksum enabled
-    pq_write_format = pa.dataset.ParquetFileFormat()
+    pq_write_format = ParquetFileFormat()
     write_options = pq_write_format.make_write_options(
         write_page_checksum=True)
 
@@ -5821,9 +5834,9 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
     )
 
     # Open dataset and verify that the data is correct
-    pq_scan_opts_crc = ds.ParquetFragmentScanOptions(
+    pq_scan_opts_crc = ParquetFragmentScanOptions(
         page_checksum_verification=True)
-    pq_read_format_crc = pa.dataset.ParquetFileFormat(
+    pq_read_format_crc = ParquetFileFormat(
         default_fragment_scan_options=pq_scan_opts_crc)
     table_check = ds.dataset(
         original_dir_path,
@@ -5853,9 +5866,9 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
 
     # Case 1: Reading the corrupted file with dataset().to_table() and without
     # page checksum verification succeeds but yields corrupted data
-    pq_scan_opts_no_crc = ds.ParquetFragmentScanOptions(
+    pq_scan_opts_no_crc = ParquetFragmentScanOptions(
         page_checksum_verification=False)
-    pq_read_format_no_crc = pa.dataset.ParquetFileFormat(
+    pq_read_format_no_crc = ParquetFileFormat(
         default_fragment_scan_options=pq_scan_opts_no_crc)
     table_corrupt = ds.dataset(
         corrupted_dir_path, format=pq_read_format_no_crc).to_table()
@@ -5884,10 +5897,10 @@ def test_make_write_options_error():
              "'pyarrow._dataset_parquet.ParquetFileFormat' objects "
              "doesn't apply to a 'int'")
     with pytest.raises(TypeError) as excinfo:
-        pa.dataset.ParquetFileFormat.make_write_options(43)
+        ParquetFileFormat.make_write_options(43)
     assert msg_1 in str(excinfo.value) or msg_2 in str(excinfo.value)
 
-    pformat = pa.dataset.ParquetFileFormat()
+    pformat = ParquetFileFormat()
     msg = "make_write_options\\(\\) takes exactly 0 positional arguments"
     with pytest.raises(TypeError, match=msg):
         pformat.make_write_options(43)
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 11d2e1f367a..dd59fec64e2 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -23,6 +23,11 @@
 
 import pytest
 
+from pyarrow.dataset import (
+    ParquetFragmentScanOptions,  # type: ignore[possibly-unbound-attribute]
+    ParquetFileFormat,  # type: ignore[possibly-unbound-attribute]
+)
+
 encryption_unavailable = False
 
 try:
@@ -34,6 +39,10 @@
 try:
     from pyarrow.tests.parquet.encryption import InMemoryKmsClient
     import pyarrow.parquet.encryption as pe
+    from pyarrow.dataset import (
+        ParquetEncryptionConfig,  # type: ignore[possibly-unbound-attribute]
+        ParquetDecryptionConfig,  # type: ignore[possibly-unbound-attribute]
+    )
 except ImportError:
     encryption_unavailable = True
 
@@ -105,15 +114,15 @@ def test_dataset_encryption_decryption():
     kms_connection_config = create_kms_connection_config()
 
     crypto_factory = pe.CryptoFactory(kms_factory)
-    parquet_encryption_cfg = ds.ParquetEncryptionConfig(
+    parquet_encryption_cfg = ParquetEncryptionConfig(
         crypto_factory, kms_connection_config, encryption_config
     )
-    parquet_decryption_cfg = ds.ParquetDecryptionConfig(
+    parquet_decryption_cfg = ParquetDecryptionConfig(
         crypto_factory, kms_connection_config, decryption_config
     )
 
     # create write_options with dataset encryption config
-    pformat = pa.dataset.ParquetFileFormat()
+    pformat = ParquetFileFormat()
     write_options = pformat.make_write_options(encryption_config=parquet_encryption_cfg)
 
     mockfs = fs._MockFileSystem()
@@ -128,15 +137,15 @@ def test_dataset_encryption_decryption():
     )
 
     # read without decryption config -> should error is dataset was properly encrypted
-    pformat = pa.dataset.ParquetFileFormat()
+    pformat = ParquetFileFormat()
     with pytest.raises(IOError, match=r"no decryption"):
         ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     # set decryption config for parquet fragment scan options
-    pq_scan_opts = ds.ParquetFragmentScanOptions(
+    pq_scan_opts = ParquetFragmentScanOptions(
         decryption_config=parquet_decryption_cfg
     )
-    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
+    pformat = ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
     dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     assert table.equals(dataset.to_table())
@@ -144,11 +153,11 @@ def test_dataset_encryption_decryption():
     # set decryption properties for parquet fragment scan options
     decryption_properties = crypto_factory.file_decryption_properties(
         kms_connection_config, decryption_config)
-    pq_scan_opts = ds.ParquetFragmentScanOptions(
+    pq_scan_opts = ParquetFragmentScanOptions(
         decryption_properties=decryption_properties
     )
 
-    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
+    pformat = ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
     dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     assert table.equals(dataset.to_table())
@@ -163,7 +172,7 @@ def test_write_dataset_parquet_without_encryption():
 
     # Set the encryption configuration using ParquetFileFormat
     # and make_write_options
-    pformat = pa.dataset.ParquetFileFormat()
+    pformat = ParquetFileFormat()
 
     with pytest.raises(NotImplementedError):
         _ = pformat.make_write_options(encryption_config="some value")
@@ -201,14 +210,14 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
         plaintext_footer=False,
         data_key_length_bits=128,
     )
-    pqe_config = ds.ParquetEncryptionConfig(
+    pqe_config = ParquetEncryptionConfig(
         crypto_factory, kms_config, encryption_config
     )
-    pqd_config = ds.ParquetDecryptionConfig(
+    pqd_config = ParquetDecryptionConfig(
         crypto_factory, kms_config, pe.DecryptionConfiguration()
     )
-    scan_options = ds.ParquetFragmentScanOptions(decryption_config=pqd_config)
-    file_format = ds.ParquetFileFormat(default_fragment_scan_options=scan_options)
+    scan_options = ParquetFragmentScanOptions(decryption_config=pqd_config)
+    file_format = ParquetFileFormat(default_fragment_scan_options=scan_options)
     write_options = file_format.make_write_options(encryption_config=pqe_config)
     file_decryption_properties = crypto_factory.file_decryption_properties(kms_config)
 
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 6b35822017b..c4631903c1a 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -63,7 +63,7 @@ def compression(request):
     yield request.param
 
 
-TEST_FILES = None
+TEST_FILES = []
 
 
 def setup_module(module):
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index 876c05d740a..51f259e4bd5 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -43,7 +43,7 @@ def root_allocator():
         'POM:version',
         namespaces={
             'POM': 'http://maven.apache.org/POM/4.0.0'
-        }).text
+        }).text  # type: ignore[possibly-unbound-attribute]
     jar_path = os.path.join(
         arrow_dir, 'java', 'tools', 'target',
         f'arrow-tools-{version}-jar-with-dependencies.jar')
diff --git a/python/pyarrow/tests/wsgi_examples.py b/python/pyarrow/tests/wsgi_examples.py
index 440b107abe5..1fafa852dc6 100644
--- a/python/pyarrow/tests/wsgi_examples.py
+++ b/python/pyarrow/tests/wsgi_examples.py
@@ -28,7 +28,7 @@ def application(env, start_response):
         # See test_fs::test_uwsgi_integration
         start_response('200 OK', [('Content-Type', 'text/html')])
         # flake8: noqa
-        fs = pyarrow.fs.S3FileSystem()
+        fs = pyarrow.fs.S3FileSystem()  # type: ignore[possibly-unbound-attribute]
         return [b"Hello World\n"]
     else:
         start_response('404 Not Found', [('Content-Type', 'text/html')])
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 7aaf602966e..8e7f14d3c46 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -107,7 +107,7 @@ fallback_version = '22.0.0a0'
 #no-matching-overload = "ignore"
 #non-subscriptable = "ignore"
 #not-iterable = "ignore"
-possibly-unbound-attribute = "ignore"
+#possibly-unbound-attribute = "ignore"
 #possibly-unbound-import = "ignore"
 #too-many-positional-arguments = "ignore"
 #unknown-argument = "ignore"
diff --git a/python/setup.py b/python/setup.py
index d037b82f4ad..f74824d5e6e 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -43,9 +43,9 @@
 # We can't use sys.platform in a cross-compiling situation
 # as here it may be set to the host not target platform
 is_emscripten = (
-    sysconfig.get_config_var("SOABI")
-    # type: ignore[possibly-unbound]
-    and sysconfig.get_config_var("SOABI").find("emscripten") != -1
+    sysconfig.get_config_var("SOABI") and
+    sysconfig.get_config_var("SOABI").find("emscripten") != -1 \
+    # type: ignore[possibly-unbound-attribute]
 )
 
 
@@ -254,9 +254,9 @@ def _run_cmake(self):
             # Detect if we built elsewhere
             if os.path.isfile('CMakeCache.txt'):
                 cachefile = open('CMakeCache.txt', 'r')
-                cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
-                                     # type: ignore[possibly-unbound-attribute]
-                                     cachefile.read()).group(1)
+                cachedir = re.search(  # type: ignore[possibly-unbound-attribute]
+                    'CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
+                    cachefile.read()).group(1)
                 cachefile.close()
                 if (cachedir != build_temp):
                     build_base = pjoin(saved_cwd, build_cmd.build_base)

From 291dd88b1ca2a6cb6453e4ebb0e8fd8b22d43e58 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 25 Jul 2025 14:19:22 +0200
Subject: [PATCH 30/32] Fix unresolved-attribute

---
 python/examples/flight/client.py             |    2 +-
 python/pyarrow/interchange/from_dataframe.py |   11 +-
 python/pyarrow/pandas_compat.py              |   78 +-
 python/pyarrow/tests/parquet/test_pandas.py  |    3 +-
 python/pyarrow/tests/test_acero.py           |   21 +-
 python/pyarrow/tests/test_array.py           |   24 +-
 python/pyarrow/tests/test_cffi.py            |    3 +-
 python/pyarrow/tests/test_compute.py         | 1093 +++++++++---------
 python/pyarrow/tests/test_csv.py             |    3 +-
 python/pyarrow/tests/test_cuda.py            |    5 +-
 python/pyarrow/tests/test_dataset.py         |   30 +-
 python/pyarrow/tests/test_exec_plan.py       |    9 +-
 python/pyarrow/tests/test_gdb.py             |    8 +-
 python/pyarrow/tests/test_io.py              |   13 +-
 python/pyarrow/tests/test_ipc.py             |   10 +-
 python/pyarrow/tests/test_pandas.py          |   18 +-
 python/pyarrow/tests/test_scalars.py         |    2 +-
 python/pyarrow/tests/test_schema.py          |    4 +-
 python/pyarrow/tests/test_sparse_tensor.py   |   11 +-
 python/pyarrow/tests/test_strategies.py      |   16 +-
 python/pyarrow/tests/test_substrait.py       |   46 +-
 python/pyarrow/tests/test_table.py           |   19 +-
 python/pyarrow/tests/test_types.py           |   52 +-
 python/pyproject.toml                        |    2 +-
 python/scripts/test_leak.py                  |    2 +-
 25 files changed, 777 insertions(+), 708 deletions(-)

diff --git a/python/examples/flight/client.py b/python/examples/flight/client.py
index 75976674bf2..8abce1ae8c8 100644
--- a/python/examples/flight/client.py
+++ b/python/examples/flight/client.py
@@ -70,7 +70,7 @@ def do_action(args, client, connection_args={}):
         print('Running action', args.action_type)
         for result in client.do_action(action):
             print("Got result", result.body.to_pybytes())
-    except pyarrow.lib.ArrowIOError as e:
+    except pyarrow.lib.ArrowIOError as e:  # type: ignore[unresolved-attribute]
         print("Error calling action:", e)
 
 
diff --git a/python/pyarrow/interchange/from_dataframe.py b/python/pyarrow/interchange/from_dataframe.py
index 80ddc8fa024..106c582c22b 100644
--- a/python/pyarrow/interchange/from_dataframe.py
+++ b/python/pyarrow/interchange/from_dataframe.py
@@ -32,6 +32,7 @@
 import re
 
 import pyarrow.compute as pc
+from pyarrow.compute import equal, invert, is_nan  # type: ignore[unresolved-attribute]
 from pyarrow.interchange.column import Dtype
 
 
@@ -513,7 +514,7 @@ def validity_buffer_from_mask(
                                               offset=offset)
 
         if sentinel_val == 1:
-            mask_bool = pc.invert(mask_bool)
+            mask_bool = invert(mask_bool)
 
         return mask_bool.buffers()[1]
 
@@ -583,8 +584,8 @@ def validity_buffer_nan_sentinel(
                 [None, data_pa_buffer],
                 offset=offset,
             )
-            mask = pc.is_nan(pyarrow_data)
-            mask = pc.invert(mask)
+            mask = is_nan(pyarrow_data)
+            mask = invert(mask)
             return mask.buffers()[1]
 
     # Check for sentinel values
@@ -603,8 +604,8 @@ def validity_buffer_nan_sentinel(
                                              length,
                                              [None, data_pa_buffer],
                                              offset=offset)
-        sentinel_arr = pc.equal(pyarrow_data, sentinel_val)
-        mask_bool = pc.invert(sentinel_arr)
+        sentinel_arr = equal(pyarrow_data, sentinel_val)
+        mask_bool = invert(sentinel_arr)
         return mask_bool.buffers()[1]
 
     elif null_kind == ColumnNullType.NON_NULLABLE:
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 131025e60c0..970126da64c 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -34,6 +34,12 @@
     import numpy as np
 except ImportError:
     pass
+
+try:
+    from pyarrow import lib  # type: ignore[unresolved-attribute]
+except ImportError:
+    pass
+
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # type: ignore[unresolved_import]  # noqa
 
@@ -48,26 +54,26 @@ def get_logical_type_map():
 
     if not _logical_type_map:
         _logical_type_map.update({
-            pa.lib.Type_NA: 'empty',
-            pa.lib.Type_BOOL: 'bool',
-            pa.lib.Type_INT8: 'int8',
-            pa.lib.Type_INT16: 'int16',
-            pa.lib.Type_INT32: 'int32',
-            pa.lib.Type_INT64: 'int64',
-            pa.lib.Type_UINT8: 'uint8',
-            pa.lib.Type_UINT16: 'uint16',
-            pa.lib.Type_UINT32: 'uint32',
-            pa.lib.Type_UINT64: 'uint64',
-            pa.lib.Type_HALF_FLOAT: 'float16',
-            pa.lib.Type_FLOAT: 'float32',
-            pa.lib.Type_DOUBLE: 'float64',
-            pa.lib.Type_DATE32: 'date',
-            pa.lib.Type_DATE64: 'date',
-            pa.lib.Type_TIME32: 'time',
-            pa.lib.Type_TIME64: 'time',
-            pa.lib.Type_BINARY: 'bytes',
-            pa.lib.Type_FIXED_SIZE_BINARY: 'bytes',
-            pa.lib.Type_STRING: 'unicode',
+            lib.Type_NA: 'empty',
+            lib.Type_BOOL: 'bool',
+            lib.Type_INT8: 'int8',
+            lib.Type_INT16: 'int16',
+            lib.Type_INT32: 'int32',
+            lib.Type_INT64: 'int64',
+            lib.Type_UINT8: 'uint8',
+            lib.Type_UINT16: 'uint16',
+            lib.Type_UINT32: 'uint32',
+            lib.Type_UINT64: 'uint64',
+            lib.Type_HALF_FLOAT: 'float16',
+            lib.Type_FLOAT: 'float32',
+            lib.Type_DOUBLE: 'float64',
+            lib.Type_DATE32: 'date',
+            lib.Type_DATE64: 'date',
+            lib.Type_TIME32: 'time',
+            lib.Type_TIME64: 'time',
+            lib.Type_BINARY: 'bytes',
+            lib.Type_FIXED_SIZE_BINARY: 'bytes',
+            lib.Type_STRING: 'unicode',
         })
     return _logical_type_map
 
@@ -78,11 +84,11 @@ def get_logical_type(arrow_type):
     try:
         return logical_type_map[arrow_type.id]
     except KeyError:
-        if isinstance(arrow_type, pa.lib.DictionaryType):
+        if isinstance(arrow_type, lib.DictionaryType):
             return 'categorical'
-        elif isinstance(arrow_type, pa.lib.ListType):
+        elif isinstance(arrow_type, lib.ListType):
             return f'list[{get_logical_type(arrow_type.value_type)}]'
-        elif isinstance(arrow_type, pa.lib.TimestampType):
+        elif isinstance(arrow_type, lib.TimestampType):
             return 'datetimetz' if arrow_type.tz is not None else 'datetime'
         elif pa.types.is_decimal(arrow_type):
             return 'decimal'
@@ -139,7 +145,7 @@ def get_extension_dtype_info(column):
         }
         physical_dtype = str(cats.codes.dtype)
     elif hasattr(dtype, 'tz'):
-        metadata = {'timezone': pa.lib.tzinfo_to_string(dtype.tz)}
+        metadata = {'timezone': lib.tzinfo_to_string(dtype.tz)}
         physical_dtype = 'datetime64[ns]'
     else:
         metadata = None
@@ -569,7 +575,7 @@ def dataframe_to_types(df, preserve_index, columns=None):
             type_ = pa.array(empty, from_pandas=True).type
         else:
             values, type_ = get_datetimetz_type(values, c.dtype, None)
-            type_ = pa.lib._ndarray_to_arrow_type(values, type_)
+            type_ = lib._ndarray_to_arrow_type(values, type_)
             if type_ is None:
                 type_ = pa.array(c, from_pandas=True).type
         types.append(type_)
@@ -755,10 +761,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
         # create ExtensionBlock
         arr = item['py_array']
         assert len(placement) == 1
-        name = columns.get(placement[0], None) \
-            # type: ignore[possibly-unbound-attribute]
-        pandas_dtype = extension_columns.get(name, None) \
-            # type: ignore[possibly-unbound-attribute]
+        name = columns[placement[0]]  # type: ignore[non-subscriptable]
+        pandas_dtype = extension_columns[name]  # type: ignore[non-subscriptable]
         if not hasattr(pandas_dtype, '__from_arrow__'):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
@@ -775,7 +779,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
 def make_datetimetz(unit, tz):
     if _pandas_api.is_v1():
         unit = 'ns'  # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
-    tz = pa.lib.string_to_tzinfo(tz)
+    tz = lib.string_to_tzinfo(tz)
     return _pandas_api.datetimetz_type(unit, tz=tz)
 
 
@@ -805,8 +809,8 @@ def table_to_dataframe(
     columns = _deserialize_column_index(table, all_columns, column_indexes)
 
     column_names = table.column_names
-    result = pa.lib.table_to_blocks(options, table, categories,
-                                    list(ext_columns_dtypes.keys()))
+    result = lib.table_to_blocks(options, table, categories,
+                                 list(ext_columns_dtypes.keys()))
     if _pandas_api.is_ge_v3():
         from pandas.api.internals import create_dataframe_from_blocks \
             # type: ignore[unresolved_import]
@@ -830,8 +834,8 @@ def table_to_dataframe(
         axes = [columns, index]
         mgr = BlockManager(blocks, axes)
         if _pandas_api.is_ge_v21():
-            # type: ignore[unresolved-attribute]
-            df = DataFrame._from_mgr(mgr, mgr.axes)
+            df = DataFrame._from_mgr(mgr, mgr.axes) \
+                # type: ignore[unresolved-attribute]
         else:
             df = DataFrame(mgr)
         return df
@@ -1166,7 +1170,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
             level = level.map(encoder)
         # ARROW-13756: if index is timezone aware DataTimeIndex
         elif pandas_dtype == "datetimetz":
-            tz = pa.lib.string_to_tzinfo(
+            tz = lib.string_to_tzinfo(
                 column_indexes[0]['metadata']['timezone'])
             level = pd.to_datetime(level, utc=True).tz_convert(tz)
             if _pandas_api.is_ge_v3():
@@ -1234,7 +1238,7 @@ def _add_any_metadata(table, pandas_metadata):
         if idx != -1:
             if col_meta['pandas_type'] == 'datetimetz':
                 col = table[idx]
-                if not isinstance(col.type, pa.lib.TimestampType):
+                if not isinstance(col.type, lib.TimestampType):
                     continue
                 metadata = col_meta['metadata']
                 if not metadata:
@@ -1273,7 +1277,7 @@ def make_tz_aware(series, tz):
     """
     Make a datetime64 Series timezone-aware for the given tz
     """
-    tz = pa.lib.string_to_tzinfo(tz)
+    tz = lib.string_to_tzinfo(tz)
     series = (series.dt.tz_localize('utc')
                     .dt.tz_convert(tz))
     return series
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 5f9fdc7896d..edc7a2610eb 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -260,7 +260,8 @@ def test_pandas_parquet_configuration_options(tempdir):
 
     for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4', 'ZSTD']:
         if (compression != 'NONE' and
-                not pa.lib.Codec.is_available(compression)):
+                not pa.lib.Codec.is_available(compression)): \
+                # type: ignore[unresolved-attribute]
             continue
         _write_table(arrow_table, filename, version='2.6',
                      compression=compression)
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index ac58792cd50..bbec49c5360 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -19,7 +19,8 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.compute import field
+from pyarrow.compute import field, multiply, sum, equal, all as pc_all \
+    # type: ignore[unresolved-import]
 
 try:
     from pyarrow.acero import (
@@ -121,7 +122,7 @@ def test_filter(table_source):
 ])
 def test_filter_all_rows(source):
     # GH-46057: filtering all rows should return empty RecordBatch with same schema
-    result_expr = source.filter(pc.field("number") < 0)
+    result_expr = source.filter(field("number") < 0)
 
     assert result_expr.num_rows == 0
     assert type(result_expr) is type(source)
@@ -138,7 +139,7 @@ def test_project(table_source):
     # default name from expression
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)]))
+        Declaration("project", ProjectNodeOptions([multiply(field("a"), 2)]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["multiply(a, 2)"]
@@ -147,7 +148,7 @@ def test_project(table_source):
     # provide name
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2"]))
+        Declaration("project", ProjectNodeOptions([multiply(field("a"), 2)], ["a2"]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["a2"]
@@ -155,12 +156,12 @@ def test_project(table_source):
 
     # input validation
     with pytest.raises(ValueError):
-        ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2", "b2"])
+        ProjectNodeOptions([multiply(field("a"), 2)], ["a2", "b2"])
 
     # no scalar expression
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([pc.sum(field("a"))]))
+        Declaration("project", ProjectNodeOptions([sum(field("a"))]))
     ])
     with pytest.raises(ValueError, match="cannot Execute non-scalar expression"):
         _ = decl.to_table()
@@ -370,7 +371,7 @@ def test_hash_join_with_residual_filter():
 
     join_opts = HashJoinNodeOptions(
         "inner", left_keys="key", right_keys="key",
-        filter_expression=pc.equal(pc.field('a'), 5))
+        filter_expression=equal(field('a'), 5))
     joined = Declaration(
         "hashjoin", options=join_opts, inputs=[left_source, right_source])
     result = joined.to_table()
@@ -382,7 +383,7 @@ def test_hash_join_with_residual_filter():
     # test filter expression referencing columns from both side
     join_opts = HashJoinNodeOptions(
         "left outer", left_keys="key", right_keys="key",
-        filter_expression=pc.equal(pc.field("a"), 5) | pc.equal(pc.field("b"), 10)
+        filter_expression=equal(field("a"), 5) | equal(field("b"), 10)
     )
     joined = Declaration(
         "hashjoin", options=join_opts, inputs=[left_source, right_source])
@@ -487,10 +488,10 @@ def test_scan(tempdir):
 
     # projection scan option
 
-    scan_opts = ScanNodeOptions(dataset, columns={"a2": pc.multiply(field("a"), 2)})
+    scan_opts = ScanNodeOptions(dataset, columns={"a2": multiply(field("a"), 2)})
     decl = Declaration("scan", scan_opts)
     result = decl.to_table()
     # "a" is included in the result (needed later on for the actual projection)
     assert result["a"].to_pylist() == [1, 2, 3]
     # "b" is still included, but without data as it will be removed by the projection
-    assert pc.all(result["b"].is_null()).as_py()
+    assert pc_all(result["b"].is_null()).as_py()
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 92db9fc177a..6ab39dd8716 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -31,6 +31,10 @@
     import numpy as np
 except ImportError:
     pass
+try:
+    from pyarrow import lib  # type: ignore[unresolved-import]
+except ImportError:
+    pass
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -323,7 +327,7 @@ def test_asarray():
     np_arr = np.asarray([_ for _ in arr])
     assert np_arr.tolist() == [0, 1, 2, 3]
     assert np_arr.dtype == np.dtype('O')
-    assert isinstance(np_arr[0], pa.lib.Int64Value)
+    assert isinstance(np_arr[0], lib.Int64Value)
 
     # Calling with the arrow array gives back an array with 'int64' dtype
     np_arr = np.asarray(arr)
@@ -1908,9 +1912,9 @@ def test_cast_from_null():
     out_types = [
 
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
+                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
     ]
     in_arr = pa.array(in_data, type=pa.null())
     for out_type in out_types:
@@ -3223,8 +3227,8 @@ def test_struct_array_field():
     x2 = a.field('x')
     y2 = a.field('y')
 
-    assert isinstance(x0, pa.lib.Int16Array)
-    assert isinstance(y1, pa.lib.FloatArray)
+    assert isinstance(x0, lib.Int16Array)
+    assert isinstance(y1, lib.FloatArray)
     assert x0.equals(pa.array([1, 3, 5], type=pa.int16()))
     assert y0.equals(pa.array([2.5, 4.5, 6.5], type=pa.float32()))
     assert x0.equals(x1)
@@ -3258,8 +3262,8 @@ def test_struct_array_flattened_field():
     x2 = a._flattened_field('x')
     y2 = a._flattened_field('y')
 
-    assert isinstance(x0, pa.lib.Int16Array)
-    assert isinstance(y1, pa.lib.FloatArray)
+    assert isinstance(x0, lib.Int16Array)
+    assert isinstance(y1, lib.FloatArray)
     assert x0.equals(pa.array([1, None, 5], type=pa.int16()))
     assert y0.equals(pa.array([2.5, None, 6.5], type=pa.float32()))
     assert x0.equals(x1)
@@ -3307,7 +3311,7 @@ def test_empty_cast():
             # ARROW-4766: Ensure that supported types conversion don't segfault
             # on empty arrays of common types
             pa.array([], type=t1).cast(t2)
-        except (pa.lib.ArrowNotImplementedError, pa.ArrowInvalid):
+        except (lib.ArrowNotImplementedError, pa.ArrowInvalid):
             continue
 
 
@@ -4103,7 +4107,7 @@ def test_list_view_from_arrays_fails(list_array_type, list_type_factory):
     mask = pa.array([False, False, True])
 
     # Ambiguous to specify both validity map and offsets or sizes with nulls
-    with pytest.raises(pa.lib.ArrowInvalid):
+    with pytest.raises(lib.ArrowInvalid):
         list_array_type.from_arrays(offsets, sizes, values, mask=mask)
 
     offsets = [0, 1, 1]
@@ -4111,7 +4115,7 @@ def test_list_view_from_arrays_fails(list_array_type, list_type_factory):
     array_slice = array[1:]
 
     # List offsets and sizes must not be slices if a validity map is specified
-    with pytest.raises(pa.lib.ArrowInvalid):
+    with pytest.raises(lib.ArrowInvalid):
         list_array_type.from_arrays(
             array_slice.offsets, array_slice.sizes,
             array_slice.values, mask=array_slice.is_null())
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 60f3a5621b9..306225dbf69 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -676,7 +676,8 @@ def test_roundtrip_reader_capsule(constructor):
     obj = constructor(schema, batches)
 
     bad_schema = pa.schema({'ints': pa.int32()})
-    with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"):
+    with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"): \
+            # type: ignore[unresolved-attribute]
         obj.__arrow_c_stream__(bad_schema.__arrow_c_schema__())
 
     # Can work with matching schema
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 4e39383473c..e9afe643994 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -27,6 +27,41 @@
 import random
 import sys
 import textwrap
+from pyarrow import lib   # type: ignore[unresolved-import]
+from pyarrow.compute import \
+    sum as pc_sum, mode, variance, skew, kurtosis, count_substring, \
+    count_substring_regex, find_substring, find_substring_regex, match_like, \
+    match_substring, match_substring_regex, utf8_trim_whitespace, \
+    ascii_trim_whitespace, utf8_trim, utf8_slice_codeunits, binary_slice, \
+    split_pattern, utf8_split_whitespace, ascii_split_whitespace, \
+    split_pattern_regex, any as pc_any, all as pc_all, filter, min_max, \
+    choose, utf8_is_printable, ascii_center, ascii_lpad, ascii_rpad, utf8_center, \
+    utf8_lpad, utf8_rpad, binary_replace_slice, utf8_replace_slice, \
+    replace_substring, replace_substring_regex, extract_regex, extract_regex_span, \
+    binary_join, binary_join_element_wise, not_equal, less, less_equal, greater, \
+    greater_equal, equal, round_to_multiple, round_binary, is_null, or_kleene, \
+    is_valid, and_, and_kleene, or_, xor, invert, dictionary_decode, \
+    dictionary_encode, strptime, strftime, year as pc_year, \
+    is_leap_year as pc_is_leap_year, month as pc_month, day as pc_day, \
+    day_of_year as pc_day_of_year, iso_year as pc_iso_year, iso_week as pc_iso_week, \
+    iso_calendar as pc_iso_calendar, quarter as pc_quarter, hour as pc_hour, \
+    minute as pc_minute, second as pc_second, millisecond as pc_millisecond, \
+    microsecond as pc_microsecond, nanosecond as pc_nanosecond, \
+    subsecond as pc_subsecond, local_timestamp as pc_local_timestamp, \
+    is_dst as pc_is_dst, day_of_week as pc_day_of_week, \
+    week as pc_week, \
+    assume_timezone as pc_assume_timezone, count, ceil_temporal, floor_temporal, \
+    round_temporal, partition_nth_indices, select_k_unstable, array_sort_indices, \
+    sort_indices, is_in, index_in, quantile, tdigest, cumulative_sum, \
+    cumulative_prod, max_element_wise, min_element_wise, cumulative_min, \
+    cumulative_max, map_lookup, struct_field, case_when, make_struct, list_element, \
+    count_distinct, utf8_normalize, rank, rank_quantile, rank_normal, negate, \
+    subtract, divide, multiply, power, sqrt, exp, cos, sin, tan, acos, atan, \
+    asin, atan2, sinh, cosh, tanh, asinh, acosh, atanh, abs as pc_abs, sign, \
+    bit_wise_not, bit_wise_and, \
+    bit_wise_or, bit_wise_xor, is_nan, is_finite, coalesce, hour, round as pc_round, \
+    add as pc_add, cast, list_slice, run_end_decode, run_end_encode, pairwise_diff, \
+    pairwise_diff_checked, pivot_wider, winsorize   # type: ignore[unresolved-import]
 
 try:
     import numpy as np
@@ -324,36 +359,36 @@ def test_function_attributes():
 
 def test_input_type_conversion():
     # Automatic array conversion from Python
-    arr = pc.add([1, 2], [4, None])
+    arr = pc_add([1, 2], [4, None])
     assert arr.to_pylist() == [5, None]
     # Automatic scalar conversion from Python
-    arr = pc.add([1, 2], 4)
+    arr = pc_add([1, 2], 4)
     assert arr.to_pylist() == [5, 6]
     # Other scalar type
-    assert pc.equal(["foo", "bar", None],
-                    "foo").to_pylist() == [True, False, None]
+    assert equal(["foo", "bar", None],
+                 "foo").to_pylist() == [True, False, None]
 
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_sum_array(arrow_type):
     arr = pa.array([1, 2, 3, 4], type=arrow_type)
     assert arr.sum().as_py() == 10
-    assert pc.sum(arr).as_py() == 10
+    assert pc_sum(arr).as_py() == 10
 
     arr = pa.array([1, 2, 3, 4, None], type=arrow_type)
     assert arr.sum().as_py() == 10
-    assert pc.sum(arr).as_py() == 10
+    assert pc_sum(arr).as_py() == 10
 
     arr = pa.array([None], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
-    assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert pc_sum(arr).as_py() is None  # noqa: E711
     assert arr.sum(min_count=0).as_py() == 0
-    assert pc.sum(arr, min_count=0).as_py() == 0
+    assert pc_sum(arr, min_count=0).as_py() == 0
 
     arr = pa.array([], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
     assert arr.sum(min_count=0).as_py() == 0
-    assert pc.sum(arr, min_count=0).as_py() == 0
+    assert pc_sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize("arrow_type", [pa.decimal128(3, 2), pa.decimal256(3, 2)])
@@ -402,24 +437,24 @@ def test_sum_decimal_array(arrow_type):
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_sum_chunked_array(arrow_type):
     arr = pa.chunked_array([pa.array([1, 2, 3, 4], type=arrow_type)])
-    assert pc.sum(arr).as_py() == 10
+    assert pc_sum(arr).as_py() == 10
 
     arr = pa.chunked_array([
         pa.array([1, 2], type=arrow_type), pa.array([3, 4], type=arrow_type)
     ])
-    assert pc.sum(arr).as_py() == 10
+    assert pc_sum(arr).as_py() == 10
 
     arr = pa.chunked_array([
         pa.array([1, 2], type=arrow_type),
         pa.array([], type=arrow_type),
         pa.array([3, 4], type=arrow_type)
     ])
-    assert pc.sum(arr).as_py() == 10
+    assert pc_sum(arr).as_py() == 10
 
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
-    assert pc.sum(arr).as_py() is None  # noqa: E711
-    assert pc.sum(arr, min_count=0).as_py() == 0
+    assert pc_sum(arr).as_py() is None  # noqa: E711
+    assert pc_sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize('arrow_type', [pa.decimal128(3, 2), pa.decimal256(3, 2)])
@@ -438,77 +473,77 @@ def test_sum_chunked_array_decimal_type(arrow_type):
             pa.array([Decimal("1.23"), Decimal("4.56")], type=arrow_type)
         ]
     )
-    assert pc.sum(arr).as_py() == expected_sum
-    assert pc.sum(arr).type == max_precision_type
+    assert pc_sum(arr).as_py() == expected_sum
+    assert pc_sum(arr).type == max_precision_type
 
     arr = pa.chunked_array([
         pa.array([Decimal("1.23")], type=arrow_type),
         pa.array([Decimal("4.56")], type=arrow_type)
     ])
-    assert pc.sum(arr).as_py() == expected_sum
-    assert pc.sum(arr).type == max_precision_type
+    assert pc_sum(arr).as_py() == expected_sum
+    assert pc_sum(arr).type == max_precision_type
 
     arr = pa.chunked_array([
         pa.array([Decimal("1.23")], type=arrow_type),
         pa.array([], type=arrow_type),
         pa.array([Decimal("4.56")], type=arrow_type)
     ])
-    assert pc.sum(arr).as_py() == expected_sum
-    assert pc.sum(arr).type == max_precision_type
+    assert pc_sum(arr).as_py() == expected_sum
+    assert pc_sum(arr).type == max_precision_type
 
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
-    assert pc.sum(arr).as_py() is None  # noqa: E711
-    assert pc.sum(arr).type == max_precision_type
-    assert pc.sum(arr, min_count=0).as_py() == zero
-    assert pc.sum(arr, min_count=0).type == max_precision_type
+    assert pc_sum(arr).as_py() is None  # noqa: E711
+    assert pc_sum(arr).type == max_precision_type
+    assert pc_sum(arr, min_count=0).as_py() == zero
+    assert pc_sum(arr, min_count=0).type == max_precision_type
 
 
 def test_mode_array():
     # ARROW-9917
-    arr = pa.array([1, 1, 3, 4, 3, 5], type='int64')
-    mode = pc.mode(arr)
-    assert len(mode) == 1
-    assert mode[0].as_py() == {"mode": 1, "count": 2}
-
-    mode = pc.mode(arr, n=2)
-    assert len(mode) == 2
-    assert mode[0].as_py() == {"mode": 1, "count": 2}
-    assert mode[1].as_py() == {"mode": 3, "count": 2}
-
-    arr = pa.array([], type='int64')
-    assert len(pc.mode(arr)) == 0
-
-    arr = pa.array([1, 1, 3, 4, 3, None], type='int64')
-    mode = pc.mode(arr, skip_nulls=False)
-    assert len(mode) == 0
-    mode = pc.mode(arr, min_count=6)
-    assert len(mode) == 0
-    mode = pc.mode(arr, skip_nulls=False, min_count=5)
-    assert len(mode) == 0
-
-    arr = pa.array([True, False])
-    mode = pc.mode(arr, n=2)
-    assert len(mode) == 2
-    assert mode[0].as_py() == {"mode": False, "count": 1}
-    assert mode[1].as_py() == {"mode": True, "count": 1}
+    data = pa.array([1, 1, 3, 4, 3, 5], type='int64')
+    arr = mode(data)
+    assert len(arr) == 1
+    assert arr[0].as_py() == {"mode": 1, "count": 2}
+
+    arr = mode(data, n=2)
+    assert len(arr) == 2
+    assert arr[0].as_py() == {"mode": 1, "count": 2}
+    assert arr[1].as_py() == {"mode": 3, "count": 2}
+
+    data = pa.array([], type='int64')
+    assert len(mode(data)) == 0
+
+    data = pa.array([1, 1, 3, 4, 3, None], type='int64')
+    arr = mode(data, skip_nulls=False)
+    assert len(arr) == 0
+    arr = mode(data, min_count=6)
+    assert len(arr) == 0
+    arr = mode(data, skip_nulls=False, min_count=5)
+    assert len(arr) == 0
+
+    data = pa.array([True, False])
+    arr = mode(data, n=2)
+    assert len(arr) == 2
+    assert arr[0].as_py() == {"mode": False, "count": 1}
+    assert arr[1].as_py() == {"mode": True, "count": 1}
 
 
 def test_mode_chunked_array():
     # ARROW-9917
-    arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')])
-    mode = pc.mode(arr)
-    assert len(mode) == 1
-    assert mode[0].as_py() == {"mode": 1, "count": 2}
+    data = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')])
+    arr = mode(data)
+    assert len(arr) == 1
+    assert arr[0].as_py() == {"mode": 1, "count": 2}
 
-    mode = pc.mode(arr, n=2)
-    assert len(mode) == 2
-    assert mode[0].as_py() == {"mode": 1, "count": 2}
-    assert mode[1].as_py() == {"mode": 3, "count": 2}
+    arr = mode(data, n=2)
+    assert len(arr) == 2
+    assert arr[0].as_py() == {"mode": 1, "count": 2}
+    assert arr[1].as_py() == {"mode": 3, "count": 2}
 
     arr = pa.chunked_array((), type='int64')
     assert arr.num_chunks == 0
-    assert len(pc.mode(arr)) == 0
+    assert len(mode(arr)) == 0
 
 
 def test_empty_chunked_array():
@@ -521,23 +556,23 @@ def test_empty_chunked_array():
 
 def test_variance():
     data = [1, 2, 3, 4, 5, 6, 7, 8]
-    assert pc.variance(data).as_py() == 5.25
-    assert pc.variance(data, ddof=0).as_py() == 5.25
-    assert pc.variance(data, ddof=1).as_py() == 6.0
+    assert variance(data).as_py() == 5.25
+    assert variance(data, ddof=0).as_py() == 5.25
+    assert variance(data, ddof=1).as_py() == 6.0
 
 
 def test_skew():
     data = [1, 1, None, 2]
-    assert pc.skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
-    assert pc.skew(data, skip_nulls=False).as_py() is None
-    assert pc.skew(data, min_count=4).as_py() is None
+    assert skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
+    assert skew(data, skip_nulls=False).as_py() is None
+    assert skew(data, min_count=4).as_py() is None
 
 
 def test_kurtosis():
     data = [1, 1, None, 2]
-    assert pc.kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
-    assert pc.kurtosis(data, skip_nulls=False).as_py() is None
-    assert pc.kurtosis(data, min_count=4).as_py() is None
+    assert kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
+    assert kurtosis(data, skip_nulls=False).as_py() is None
+    assert kurtosis(data, min_count=4).as_py() is None
 
 
 @pytest.mark.parametrize("input, expected", (
@@ -550,8 +585,8 @@ def test_kurtosis():
     ([1, 40], {'skew': None, 'kurtosis': None}),
 ))
 def test_unbiased_skew_and_kurtosis(input, expected):
-    arrow_skew = pc.skew(input, skip_nulls=True, biased=False)
-    arrow_kurtosis = pc.kurtosis(input, skip_nulls=True, biased=False)
+    arrow_skew = skew(input, skip_nulls=True, biased=False)
+    arrow_kurtosis = kurtosis(input, skip_nulls=True, biased=False)
     assert arrow_skew.as_py() == expected['skew']
     assert arrow_kurtosis.as_py() == expected['kurtosis']
 
@@ -561,11 +596,11 @@ def test_count_substring():
                          (pa.large_string(), pa.int64())]:
         arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None], type=ty)
 
-        result = pc.count_substring(arr, "ab")
+        result = count_substring(arr, "ab")
         expected = pa.array([1, 1, 2, 0, 0, None], type=offset)
         assert expected == result
 
-        result = pc.count_substring(arr, "ab", ignore_case=True)
+        result = count_substring(arr, "ab", ignore_case=True)
         expected = pa.array([1, 1, 2, 0, 1, None], type=offset)
         assert expected == result
 
@@ -575,11 +610,11 @@ def test_count_substring_regex():
                          (pa.large_string(), pa.int64())]:
         arr = pa.array(["ab", "cab", "baAacaa", "ba", "AB", None], type=ty)
 
-        result = pc.count_substring_regex(arr, "a+")
+        result = count_substring_regex(arr, "a+")
         expected = pa.array([1, 1, 3, 1, 0, None], type=offset)
         assert expected.equals(result)
 
-        result = pc.count_substring_regex(arr, "a+", ignore_case=True)
+        result = count_substring_regex(arr, "a+", ignore_case=True)
         expected = pa.array([1, 1, 2, 1, 1, None], type=offset)
         assert expected.equals(result)
 
@@ -587,61 +622,61 @@ def test_count_substring_regex():
 def test_find_substring():
     for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
         arr = pa.array(["ab", "cab", "ba", None], type=ty)
-        result = pc.find_substring(arr, "ab")
+        result = find_substring(arr, "ab")
         assert result.to_pylist() == [0, 1, -1, None]
 
-        result = pc.find_substring_regex(arr, "a?b")
+        result = find_substring_regex(arr, "a?b")
         assert result.to_pylist() == [0, 1, 0, None]
 
         arr = pa.array(["ab*", "cAB*", "ba", "aB?"], type=ty)
-        result = pc.find_substring(arr, "aB*", ignore_case=True)
+        result = find_substring(arr, "aB*", ignore_case=True)
         assert result.to_pylist() == [0, 1, -1, -1]
 
-        result = pc.find_substring_regex(arr, "a?b", ignore_case=True)
+        result = find_substring_regex(arr, "a?b", ignore_case=True)
         assert result.to_pylist() == [0, 1, 0, 0]
 
 
 def test_match_like():
     arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
-    result = pc.match_like(arr, r"_a\%%")
+    result = match_like(arr, r"_a\%%")
     expected = pa.array([False, True, False, True, None])
     assert expected.equals(result)
 
     arr = pa.array(["aB", "bA%", "ba", "ca%d", None])
-    result = pc.match_like(arr, r"_a\%%", ignore_case=True)
+    result = match_like(arr, r"_a\%%", ignore_case=True)
     expected = pa.array([False, True, False, True, None])
     assert expected.equals(result)
-    result = pc.match_like(arr, r"_a\%%", ignore_case=False)
+    result = match_like(arr, r"_a\%%", ignore_case=False)
     expected = pa.array([False, False, False, True, None])
     assert expected.equals(result)
 
 
 def test_match_substring():
     arr = pa.array(["ab", "abc", "ba", None])
-    result = pc.match_substring(arr, "ab")
+    result = match_substring(arr, "ab")
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
 
     arr = pa.array(["áB", "Ábc", "ba", None])
-    result = pc.match_substring(arr, "áb", ignore_case=True)
+    result = match_substring(arr, "áb", ignore_case=True)
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
-    result = pc.match_substring(arr, "áb", ignore_case=False)
+    result = match_substring(arr, "áb", ignore_case=False)
     expected = pa.array([False, False, False, None])
     assert expected.equals(result)
 
 
 def test_match_substring_regex():
     arr = pa.array(["ab", "abc", "ba", "c", None])
-    result = pc.match_substring_regex(arr, "^a?b")
+    result = match_substring_regex(arr, "^a?b")
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
 
     arr = pa.array(["aB", "Abc", "BA", "c", None])
-    result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
+    result = match_substring_regex(arr, "^a?b", ignore_case=True)
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
-    result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
+    result = match_substring_regex(arr, "^a?b", ignore_case=False)
     expected = pa.array([False, False, False, False, None])
     assert expected.equals(result)
 
@@ -649,21 +684,21 @@ def test_match_substring_regex():
 def test_trim():
     # \u3000 is unicode whitespace
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = pc.utf8_trim_whitespace(arr)
+    result = utf8_trim_whitespace(arr)
     expected = pa.array(["foo", None, "foo bar"])
     assert expected.equals(result)
 
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = pc.ascii_trim_whitespace(arr)
+    result = ascii_trim_whitespace(arr)
     expected = pa.array(["foo", None, "\u3000foo bar"])
     assert expected.equals(result)
 
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = pc.utf8_trim(arr, characters=' f\u3000')
+    result = utf8_trim(arr, characters=' f\u3000')
     expected = pa.array(["oo", None, "oo bar \t"])
     assert expected.equals(result)
     # Positional option
-    result = pc.utf8_trim(arr, ' f\u3000')
+    result = utf8_trim(arr, ' f\u3000')
     expected = pa.array(["oo", None, "oo bar \t"])
     assert expected.equals(result)
 
@@ -675,12 +710,12 @@ def test_slice_compatibility():
             for step in [-3, -2, -1, 1, 2, 3]:
                 expected = pa.array([k.as_py()[start:stop:step]
                                      for k in arr])
-                result = pc.utf8_slice_codeunits(
+                result = utf8_slice_codeunits(
                     arr, start=start, stop=stop, step=step)
                 assert expected.equals(result)
                 # Positional options
-                assert pc.utf8_slice_codeunits(arr,
-                                               start, stop, step) == result
+                assert utf8_slice_codeunits(arr,
+                                            start, stop, step) == result
 
 
 def test_binary_slice_compatibility():
@@ -693,113 +728,113 @@ def test_binary_slice_compatibility():
             continue
         expected = pa.array([k.as_py()[start:stop:step]
                              for k in arr])
-        result = pc.binary_slice(
+        result = binary_slice(
             arr, start=start, stop=stop, step=step)
         assert expected.equals(result)
         # Positional options
-        assert pc.binary_slice(arr, start, stop, step) == result
+        assert binary_slice(arr, start, stop, step) == result
         # Fixed size binary input / output
         for item in data:
             fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
             expected = item[start:stop:step]
-            actual = pc.binary_slice(fsb_scalar, start, stop, step)
+            actual = binary_slice(fsb_scalar, start, stop, step)
             assert actual.type == pa.binary(len(expected))
             assert actual.as_py() == expected
 
 
 def test_split_pattern():
     arr = pa.array(["-foo---bar--", "---foo---b"])
-    result = pc.split_pattern(arr, pattern="---")
+    result = split_pattern(arr, pattern="---")
     expected = pa.array([["-foo", "bar--"], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = pc.split_pattern(arr, "---", max_splits=1)
+    result = split_pattern(arr, "---", max_splits=1)
     expected = pa.array([["-foo", "bar--"], ["", "foo---b"]])
     assert expected.equals(result)
 
-    result = pc.split_pattern(arr, "---", max_splits=1, reverse=True)
+    result = split_pattern(arr, "---", max_splits=1, reverse=True)
     expected = pa.array([["-foo", "bar--"], ["---foo", "b"]])
     assert expected.equals(result)
 
 
 def test_split_whitespace_utf8():
     arr = pa.array(["foo bar", " foo  \u3000\tb"])
-    result = pc.utf8_split_whitespace(arr)
+    result = utf8_split_whitespace(arr)
     expected = pa.array([["foo", "bar"], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = pc.utf8_split_whitespace(arr, max_splits=1)
+    result = utf8_split_whitespace(arr, max_splits=1)
     expected = pa.array([["foo", "bar"], ["", "foo  \u3000\tb"]])
     assert expected.equals(result)
 
-    result = pc.utf8_split_whitespace(arr, max_splits=1, reverse=True)
+    result = utf8_split_whitespace(arr, max_splits=1, reverse=True)
     expected = pa.array([["foo", "bar"], [" foo", "b"]])
     assert expected.equals(result)
 
 
 def test_split_whitespace_ascii():
     arr = pa.array(["foo bar", " foo  \u3000\tb"])
-    result = pc.ascii_split_whitespace(arr)
+    result = ascii_split_whitespace(arr)
     expected = pa.array([["foo", "bar"], ["", "foo", "\u3000", "b"]])
     assert expected.equals(result)
 
-    result = pc.ascii_split_whitespace(arr, max_splits=1)
+    result = ascii_split_whitespace(arr, max_splits=1)
     expected = pa.array([["foo", "bar"], ["", "foo  \u3000\tb"]])
     assert expected.equals(result)
 
-    result = pc.ascii_split_whitespace(arr, max_splits=1, reverse=True)
+    result = ascii_split_whitespace(arr, max_splits=1, reverse=True)
     expected = pa.array([["foo", "bar"], [" foo  \u3000", "b"]])
     assert expected.equals(result)
 
 
 def test_split_pattern_regex():
     arr = pa.array(["-foo---bar--", "---foo---b"])
-    result = pc.split_pattern_regex(arr, pattern="-+")
+    result = split_pattern_regex(arr, pattern="-+")
     expected = pa.array([["", "foo", "bar", ""], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = pc.split_pattern_regex(arr, "-+", max_splits=1)
+    result = split_pattern_regex(arr, "-+", max_splits=1)
     expected = pa.array([["", "foo---bar--"], ["", "foo---b"]])
     assert expected.equals(result)
 
     with pytest.raises(NotImplementedError,
                        match="Cannot split in reverse with regex"):
-        result = pc.split_pattern_regex(
+        result = split_pattern_regex(
             arr, pattern="---", max_splits=1, reverse=True)
 
 
 def test_min_max():
     # An example generated function wrapper with possible options
     data = [4, 5, 6, None, 1]
-    s = pc.min_max(data)
+    s = min_max(data)
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.ScalarAggregateOptions())
+    s = min_max(data, options=pc.ScalarAggregateOptions())
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
+    s = min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
+    s = min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
     assert s.as_py() == {'min': None, 'max': None}
 
     # Options as dict of kwargs
-    s = pc.min_max(data, options={'skip_nulls': False})
+    s = min_max(data, options={'skip_nulls': False})
     assert s.as_py() == {'min': None, 'max': None}
     # Options as named functions arguments
-    s = pc.min_max(data, skip_nulls=False)
+    s = min_max(data, skip_nulls=False)
     assert s.as_py() == {'min': None, 'max': None}
 
     # Both options and named arguments
     with pytest.raises(TypeError):
-        s = pc.min_max(
+        s = min_max(
             data, options=pc.ScalarAggregateOptions(), skip_nulls=False)
 
     # Wrong options type
     options = pc.TakeOptions()
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=options)
+        s = min_max(data, options=options)
 
     # Missing argument
     with pytest.raises(TypeError, match="min_max takes 1 positional"):
-        s = pc.min_max()
+        s = min_max()
 
 
 def test_any():
@@ -808,17 +843,17 @@ def test_any():
     options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)
 
     a = pa.array([], type='bool')
-    assert pc.any(a).as_py() is None
-    assert pc.any(a, min_count=0).as_py() is False
-    assert pc.any(a, options=options).as_py() is False
+    assert pc_any(a).as_py() is None
+    assert pc_any(a, min_count=0).as_py() is False
+    assert pc_any(a, options=options).as_py() is False
 
     a = pa.array([False, None, True])
-    assert pc.any(a).as_py() is True
-    assert pc.any(a, options=options).as_py() is True
+    assert pc_any(a).as_py() is True
+    assert pc_any(a, options=options).as_py() is True
 
     a = pa.array([False, None, False])
-    assert pc.any(a).as_py() is False
-    assert pc.any(a, options=options).as_py() is None
+    assert pc_any(a).as_py() is False
+    assert pc_any(a, options=options).as_py() is None
 
 
 def test_all():
@@ -827,39 +862,39 @@ def test_all():
     options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)
 
     a = pa.array([], type='bool')
-    assert pc.all(a).as_py() is None
-    assert pc.all(a, min_count=0).as_py() is True
-    assert pc.all(a, options=options).as_py() is True
+    assert pc_all(a).as_py() is None
+    assert pc_all(a, min_count=0).as_py() is True
+    assert pc_all(a, options=options).as_py() is True
 
     a = pa.array([False, True])
-    assert pc.all(a).as_py() is False
-    assert pc.all(a, options=options).as_py() is False
+    assert pc_all(a).as_py() is False
+    assert pc_all(a, options=options).as_py() is False
 
     a = pa.array([True, None])
-    assert pc.all(a).as_py() is True
-    assert pc.all(a, options=options).as_py() is None
+    assert pc_all(a).as_py() is True
+    assert pc_all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [True, None]])
-    assert pc.all(a).as_py() is True
-    assert pc.all(a, options=options).as_py() is None
+    assert pc_all(a).as_py() is True
+    assert pc_all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [False]])
-    assert pc.all(a).as_py() is False
-    assert pc.all(a, options=options).as_py() is False
+    assert pc_all(a).as_py() is False
+    assert pc_all(a, options=options).as_py() is False
 
 
 def test_is_valid():
     # An example generated function wrapper without options
     data = [4, 5, None]
-    assert pc.is_valid(data).to_pylist() == [True, True, False]
+    assert is_valid(data).to_pylist() == [True, True, False]
 
     with pytest.raises(TypeError):
-        pc.is_valid(data, options=None)
+        is_valid(data, options=None)
 
 
 def test_generated_docstrings():
     # With options
-    assert pc.min_max.__doc__ == textwrap.dedent("""\
+    assert min_max.__doc__ == textwrap.dedent("""\
         Compute the minimum and maximum values of a numeric array.
 
         Null values are ignored by default.
@@ -881,7 +916,7 @@ def test_generated_docstrings():
             If not passed, will allocate memory from the default memory pool.
         """)
     # Without options
-    assert pc.add.__doc__ == textwrap.dedent("""\
+    assert pc_add.__doc__ == textwrap.dedent("""\
         Add the arguments element-wise.
 
         Results will wrap around on integer overflow.
@@ -898,7 +933,7 @@ def test_generated_docstrings():
             If not passed, will allocate memory from the default memory pool.
         """)
     # Varargs with options
-    assert pc.min_element_wise.__doc__ == textwrap.dedent("""\
+    assert min_element_wise.__doc__ == textwrap.dedent("""\
         Find the element-wise minimum value.
 
         Nulls are ignored (by default) or propagated.
@@ -916,7 +951,7 @@ def test_generated_docstrings():
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
         """)
-    assert pc.filter.__doc__ == textwrap.dedent("""\
+    assert filter.__doc__ == textwrap.dedent("""\
         Filter with a boolean selection filter.
 
         The output is populated with values from the input at positions
@@ -963,24 +998,24 @@ def test_generated_signatures():
     # options and their default values.
 
     # Without options
-    sig = inspect.signature(pc.add)
+    sig = inspect.signature(pc_add)
     assert str(sig) == "(x, y, /, *, memory_pool=None)"
     # With options
-    sig = inspect.signature(pc.min_max)
+    sig = inspect.signature(min_max)
     assert str(sig) == ("(array, /, *, skip_nulls=True, min_count=1, "
                         "options=None, memory_pool=None)")
     # With positional options
-    sig = inspect.signature(pc.quantile)
+    sig = inspect.signature(quantile)
     assert str(sig) == ("(array, /, q=0.5, *, interpolation='linear', "
                         "skip_nulls=True, min_count=0, "
                         "options=None, memory_pool=None)")
     # Varargs with options
-    sig = inspect.signature(pc.binary_join_element_wise)
+    sig = inspect.signature(binary_join_element_wise)
     assert str(sig) == ("(*strings, null_handling='emit_null', "
                         "null_replacement='', options=None, "
                         "memory_pool=None)")
     # Varargs without options
-    sig = inspect.signature(pc.choose)
+    sig = inspect.signature(choose)
     assert str(sig) == "(indices, /, *values, memory_pool=None)"
     # Nullary with options
     sig = inspect.signature(pc.random)
@@ -997,7 +1032,7 @@ def find_new_unicode_codepoints():
     new = set()
     characters = [chr(c) for c in range(0x80, 0x11000)
                   if not (0xD800 <= c < 0xE000)]
-    is_printable = pc.utf8_is_printable(pa.array(characters)).to_pylist()
+    is_printable = utf8_is_printable(pa.array(characters)).to_pylist()
     for i, c in enumerate(characters):
         if is_printable[i] != c.isprintable():
             new.add(ord(c))
@@ -1117,20 +1152,20 @@ def test_string_py_compat_boolean(function_name, variant):
 
 def test_pad():
     arr = pa.array([None, 'a', 'abcd'])
-    assert pc.ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
-    assert pc.ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
-    assert pc.ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
-    assert pc.ascii_center(arr, 3).tolist() == [None, ' a ', 'abcd']
-    assert pc.ascii_lpad(arr, 3).tolist() == [None, '  a', 'abcd']
-    assert pc.ascii_rpad(arr, 3).tolist() == [None, 'a  ', 'abcd']
+    assert ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
+    assert ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
+    assert ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
+    assert ascii_center(arr, 3).tolist() == [None, ' a ', 'abcd']
+    assert ascii_lpad(arr, 3).tolist() == [None, '  a', 'abcd']
+    assert ascii_rpad(arr, 3).tolist() == [None, 'a  ', 'abcd']
 
     arr = pa.array([None, 'á', 'abcd'])
-    assert pc.utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
-    assert pc.utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
-    assert pc.utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
-    assert pc.utf8_center(arr, 3).tolist() == [None, ' á ', 'abcd']
-    assert pc.utf8_lpad(arr, 3).tolist() == [None, '  á', 'abcd']
-    assert pc.utf8_rpad(arr, 3).tolist() == [None, 'á  ', 'abcd']
+    assert utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
+    assert utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
+    assert utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
+    assert utf8_center(arr, 3).tolist() == [None, ' á ', 'abcd']
+    assert utf8_lpad(arr, 3).tolist() == [None, '  á', 'abcd']
+    assert utf8_rpad(arr, 3).tolist() == [None, 'á  ', 'abcd']
 
 
 def test_utf8_zfill():
@@ -1173,53 +1208,53 @@ def test_replace_slice():
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
-            actual = pc.binary_replace_slice(
+            actual = binary_replace_slice(
                 arr, start=start, stop=stop, replacement='XX')
             assert actual.tolist() == expected.tolist()
             # Positional options
-            assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual
+            assert binary_replace_slice(arr, start, stop, 'XX') == actual
 
     arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
     series = arr.to_pandas().astype(object).replace({np.nan: None})
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
-            actual = pc.utf8_replace_slice(
+            actual = utf8_replace_slice(
                 arr, start=start, stop=stop, replacement='XX')
             assert actual.tolist() == expected.tolist()
 
 
 def test_replace_plain():
     data = pa.array(['foozfoo', 'food', None])
-    ar = pc.replace_substring(data, pattern='foo', replacement='bar')
+    ar = replace_substring(data, pattern='foo', replacement='bar')
     assert ar.tolist() == ['barzbar', 'bard', None]
-    ar = pc.replace_substring(data, 'foo', 'bar')
+    ar = replace_substring(data, 'foo', 'bar')
     assert ar.tolist() == ['barzbar', 'bard', None]
 
-    ar = pc.replace_substring(data, pattern='foo', replacement='bar',
-                              max_replacements=1)
+    ar = replace_substring(data, pattern='foo', replacement='bar',
+                           max_replacements=1)
     assert ar.tolist() == ['barzfoo', 'bard', None]
-    ar = pc.replace_substring(data, 'foo', 'bar', max_replacements=1)
+    ar = replace_substring(data, 'foo', 'bar', max_replacements=1)
     assert ar.tolist() == ['barzfoo', 'bard', None]
 
 
 def test_replace_regex():
     data = pa.array(['foo', 'mood', None])
     expected = ['f00', 'm00d', None]
-    ar = pc.replace_substring_regex(data, pattern='(.)oo', replacement=r'\100')
+    ar = replace_substring_regex(data, pattern='(.)oo', replacement=r'\100')
     assert ar.tolist() == expected
-    ar = pc.replace_substring_regex(data, '(.)oo', replacement=r'\100')
+    ar = replace_substring_regex(data, '(.)oo', replacement=r'\100')
     assert ar.tolist() == expected
-    ar = pc.replace_substring_regex(data, '(.)oo', r'\100')
+    ar = replace_substring_regex(data, '(.)oo', r'\100')
     assert ar.tolist() == expected
 
 
 def test_extract_regex():
     ar = pa.array(['a1', 'zb2z'])
     expected = [{'letter': 'a', 'digit': '1'}, {'letter': 'b', 'digit': '2'}]
-    struct = pc.extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
+    struct = extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
     assert struct.tolist() == expected
-    struct = pc.extract_regex(ar, r'(?P<letter>[ab])(?P<digit>\d)')
+    struct = extract_regex(ar, r'(?P<letter>[ab])(?P<digit>\d)')
     assert struct.tolist() == expected
 
 
@@ -1227,50 +1262,50 @@ def test_extract_regex_span():
     ar = pa.array(['a1', 'zb234z'])
     expected = [{'letter': [0, 1], 'digit': [1, 1]},
                 {'letter': [1, 1], 'digit': [2, 3]}]
-    struct = pc.extract_regex_span(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d+)')
+    struct = extract_regex_span(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d+)')
     assert struct.tolist() == expected
-    struct = pc.extract_regex_span(ar, r'(?P<letter>[ab])(?P<digit>\d+)')
+    struct = extract_regex_span(ar, r'(?P<letter>[ab])(?P<digit>\d+)')
     assert struct.tolist() == expected
 
 
 def test_binary_join():
     ar_list = pa.array([['foo', 'bar'], None, []])
     expected = pa.array(['foo-bar', None, ''])
-    assert pc.binary_join(ar_list, '-').equals(expected)
+    assert binary_join(ar_list, '-').equals(expected)
 
     separator_array = pa.array(['1', '2'], type=pa.binary())
     expected = pa.array(['a1b', 'c2d'], type=pa.binary())
     ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
-    assert pc.binary_join(ar_list, separator_array).equals(expected)
+    assert binary_join(ar_list, separator_array).equals(expected)
 
 
 def test_binary_join_element_wise():
     null = pa.scalar(None, type=pa.string())
     arrs = [[None, 'a', 'b'], ['c', None, 'd'], [None, '-', '--']]
-    assert pc.binary_join_element_wise(*arrs).to_pylist() == \
+    assert binary_join_element_wise(*arrs).to_pylist() == \
         [None, None, 'b--d']
-    assert pc.binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
-    assert pc.binary_join_element_wise('a', null, '-').as_py() is None
-    assert pc.binary_join_element_wise('a', 'b', null).as_py() is None
+    assert binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
+    assert binary_join_element_wise('a', null, '-').as_py() is None
+    assert binary_join_element_wise('a', 'b', null).as_py() is None
 
     skip = pc.JoinOptions(null_handling='skip')
-    assert pc.binary_join_element_wise(*arrs, options=skip).to_pylist() == \
+    assert binary_join_element_wise(*arrs, options=skip).to_pylist() == \
         [None, 'a', 'b--d']
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', 'b', '-', options=skip).as_py() == 'a-b'
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', null, '-', options=skip).as_py() == 'a'
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', 'b', null, options=skip).as_py() is None
 
     replace = pc.JoinOptions(null_handling='replace', null_replacement='spam')
-    assert pc.binary_join_element_wise(*arrs, options=replace).to_pylist() == \
+    assert binary_join_element_wise(*arrs, options=replace).to_pylist() == \
         [None, 'a-spam', 'b--d']
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', 'b', '-', options=replace).as_py() == 'a-b'
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', null, '-', options=replace).as_py() == 'a-spam'
-    assert pc.binary_join_element_wise(
+    assert binary_join_element_wise(
         'a', 'b', null, options=replace).as_py() is None
 
 
@@ -1598,22 +1633,22 @@ def con(values):
     arr1 = con([1, 2, 3, 4, None])
     arr2 = con([1, 1, 4, None, 4])
 
-    result = pc.equal(arr1, arr2)
+    result = equal(arr1, arr2)
     assert result.equals(con([True, False, False, None, None]))
 
-    result = pc.not_equal(arr1, arr2)
+    result = not_equal(arr1, arr2)
     assert result.equals(con([False, True, True, None, None]))
 
-    result = pc.less(arr1, arr2)
+    result = less(arr1, arr2)
     assert result.equals(con([False, False, True, None, None]))
 
-    result = pc.less_equal(arr1, arr2)
+    result = less_equal(arr1, arr2)
     assert result.equals(con([True, False, True, None, None]))
 
-    result = pc.greater(arr1, arr2)
+    result = greater(arr1, arr2)
     assert result.equals(con([False, True, False, None, None]))
 
-    result = pc.greater_equal(arr1, arr2)
+    result = greater_equal(arr1, arr2)
     assert result.equals(con([True, True, False, None, None]))
 
 
@@ -1629,28 +1664,28 @@ def con(values):
     arr = con(['a', 'b', 'c', None])
     scalar = pa.scalar('b')
 
-    result = pc.equal(arr, scalar)
+    result = equal(arr, scalar)
     assert result.equals(con([False, True, False, None]))
 
     if typ == "array":
         nascalar = pa.scalar(None, type="string")
-        result = pc.equal(arr, nascalar)
-        isnull = pc.is_null(result)
+        result = equal(arr, nascalar)
+        isnull = is_null(result)
         assert isnull.equals(con([True, True, True, True]))
 
-    result = pc.not_equal(arr, scalar)
+    result = not_equal(arr, scalar)
     assert result.equals(con([True, False, True, None]))
 
-    result = pc.less(arr, scalar)
+    result = less(arr, scalar)
     assert result.equals(con([True, False, False, None]))
 
-    result = pc.less_equal(arr, scalar)
+    result = less_equal(arr, scalar)
     assert result.equals(con([True, True, False, None]))
 
-    result = pc.greater(arr, scalar)
+    result = greater(arr, scalar)
     assert result.equals(con([False, False, True, None]))
 
-    result = pc.greater_equal(arr, scalar)
+    result = greater_equal(arr, scalar)
     assert result.equals(con([False, True, True, None]))
 
 
@@ -1666,27 +1701,27 @@ def con(values):
     arr = con([1, 2, 3, None])
     scalar = pa.scalar(2)
 
-    result = pc.equal(arr, scalar)
+    result = equal(arr, scalar)
     assert result.equals(con([False, True, False, None]))
 
     if typ == "array":
         nascalar = pa.scalar(None, type="int64")
-        result = pc.equal(arr, nascalar)
+        result = equal(arr, nascalar)
         assert result.to_pylist() == [None, None, None, None]
 
-    result = pc.not_equal(arr, scalar)
+    result = not_equal(arr, scalar)
     assert result.equals(con([True, False, True, None]))
 
-    result = pc.less(arr, scalar)
+    result = less(arr, scalar)
     assert result.equals(con([True, False, False, None]))
 
-    result = pc.less_equal(arr, scalar)
+    result = less_equal(arr, scalar)
     assert result.equals(con([True, True, False, None]))
 
-    result = pc.greater(arr, scalar)
+    result = greater(arr, scalar)
     assert result.equals(con([False, False, True, None]))
 
-    result = pc.greater_equal(arr, scalar)
+    result = greater_equal(arr, scalar)
     assert result.equals(con([False, True, True, None]))
 
 
@@ -1702,14 +1737,14 @@ def test_compare_chunked_array_mixed():
         (arr_chunked, arr),
         (arr_chunked, arr_chunked2),
     ]:
-        result = pc.equal(left, right)
+        result = equal(left, right)
         assert result.equals(expected)
 
 
 def test_arithmetic_add():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = pc.add(left, right)
+    result = pc_add(left, right)
     expected = pa.array([1, 1, 4, 6, 8])
     assert result.equals(expected)
 
@@ -1717,7 +1752,7 @@ def test_arithmetic_add():
 def test_arithmetic_subtract():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = pc.subtract(left, right)
+    result = subtract(left, right)
     expected = pa.array([1, 3, 2, 2, 2])
     assert result.equals(expected)
 
@@ -1725,7 +1760,7 @@ def test_arithmetic_subtract():
 def test_arithmetic_multiply():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = pc.multiply(left, right)
+    result = multiply(left, right)
     expected = pa.array([0, -2, 3, 8, 15])
     assert result.equals(expected)
 
@@ -1733,10 +1768,10 @@ def test_arithmetic_multiply():
 @pytest.mark.parametrize("ty", ["round", "round_to_multiple"])
 def test_round_to_integer(ty):
     if ty == "round":
-        round_func = pc.round
+        round_func = pc_round
         RoundOptions = partial(pc.RoundOptions, ndigits=0)
     elif ty == "round_to_multiple":
-        round_func = pc.round_to_multiple
+        round_func = round_to_multiple
         RoundOptions = partial(pc.RoundToMultipleOptions, multiple=1)
 
     values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
@@ -1771,11 +1806,11 @@ def test_round():
     }
     for ndigits, expected in ndigits_and_expected.items():
         options = pc.RoundOptions(ndigits, "half_towards_infinity")
-        result = pc.round(values, options=options)
+        result = pc_round(values, options=options)
         np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
-        assert pc.round(values, ndigits,
+        assert pc_round(values, ndigits,
                         round_mode="half_towards_infinity") == result
-        assert pc.round(values, ndigits, "half_towards_infinity") == result
+        assert pc_round(values, ndigits, "half_towards_infinity") == result
 
 
 @pytest.mark.numpy
@@ -1791,19 +1826,19 @@ def test_round_to_multiple():
     }
     for multiple, expected in multiple_and_expected.items():
         options = pc.RoundToMultipleOptions(multiple, "half_towards_infinity")
-        result = pc.round_to_multiple(values, options=options)
+        result = round_to_multiple(values, options=options)
         np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
-        assert pc.round_to_multiple(values, multiple,
-                                    "half_towards_infinity") == result
+        assert round_to_multiple(values, multiple,
+                                 "half_towards_infinity") == result
 
     for multiple in [0, -2, pa.scalar(-10.4)]:
         with pytest.raises(pa.ArrowInvalid,
                            match="Rounding multiple must be positive"):
-            pc.round_to_multiple(values, multiple=multiple)
+            round_to_multiple(values, multiple=multiple)
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
-            pc.round_to_multiple(values, multiple=multiple)
+            round_to_multiple(values, multiple=multiple)
 
 
 def test_round_binary():
@@ -1811,15 +1846,15 @@ def test_round_binary():
     scales = pa.array([-3, -2, -1, 0, 1, 2, 3], pa.int32())
     expected = pa.array(
         [0, 200, 350, 457, 123.5, 234.57, 345.678], pa.float64())
-    assert pc.round_binary(values, scales) == expected
+    assert round_binary(values, scales) == expected
 
     expect_zero = pa.scalar(0, pa.float64())
     expect_inf = pa.scalar(10, pa.float64())
     scale = pa.scalar(-1, pa.int32())
 
-    assert pc.round_binary(
+    assert round_binary(
         5.0, scale, round_mode="half_towards_zero") == expect_zero
-    assert pc.round_binary(
+    assert round_binary(
         5.0, scale, round_mode="half_towards_infinity") == expect_inf
 
 
@@ -1828,11 +1863,11 @@ def test_is_null():
     result = arr.is_null()
     expected = pa.array([False, False, False, True])
     assert result.equals(expected)
-    assert result.equals(pc.is_null(arr))
+    assert result.equals(is_null(arr))
     result = arr.is_valid()
     expected = pa.array([True, True, True, False])
     assert result.equals(expected)
-    assert result.equals(pc.is_valid(arr))
+    assert result.equals(is_valid(arr))
 
     arr = pa.chunked_array([[1, 2], [3, None]])
     result = arr.is_null()
@@ -1952,27 +1987,27 @@ def test_logical():
     a = pa.array([True, False, False, None])
     b = pa.array([True, True, False, True])
 
-    assert pc.and_(a, b) == pa.array([True, False, False, None])
-    assert pc.and_kleene(a, b) == pa.array([True, False, False, None])
+    assert and_(a, b) == pa.array([True, False, False, None])
+    assert and_kleene(a, b) == pa.array([True, False, False, None])
 
-    assert pc.or_(a, b) == pa.array([True, True, False, None])
-    assert pc.or_kleene(a, b) == pa.array([True, True, False, True])
+    assert or_(a, b) == pa.array([True, True, False, None])
+    assert or_kleene(a, b) == pa.array([True, True, False, True])
 
-    assert pc.xor(a, b) == pa.array([False, True, False, None])
+    assert xor(a, b) == pa.array([False, True, False, None])
 
-    assert pc.invert(a) == pa.array([False, True, True, None])
+    assert invert(a) == pa.array([False, True, True, None])
 
 
 def test_dictionary_decode():
     array = pa.array(["a", "a", "b", "c", "b"])
     dictionary_array = array.dictionary_encode()
-    dictionary_array_decode = pc.dictionary_decode(dictionary_array)
+    dictionary_array_decode = dictionary_decode(dictionary_array)
 
     assert array != dictionary_array
 
     assert array == dictionary_array_decode
-    assert array == pc.dictionary_decode(array)
-    assert pc.dictionary_encode(dictionary_array) == dictionary_array
+    assert array == dictionary_decode(array)
+    assert dictionary_encode(dictionary_array) == dictionary_array
 
 
 def test_cast():
@@ -2049,7 +2084,7 @@ def test_fsl_to_fsl_cast(value_type):
     # Different sized FSL
     cast_type = pa.list_(pa.field("element", value_type), 3)
     err_msg = 'Size of FixedSizeList is not the same.'
-    with pytest.raises(pa.lib.ArrowTypeError, match=err_msg):
+    with pytest.raises(lib.ArrowTypeError, match=err_msg):
         fsl.cast(cast_type)
 
 
@@ -2247,28 +2282,28 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
 def test_strptime():
     arr = pa.array(["5/1/2020", None, "12/13/1900"])
 
-    got = pc.strptime(arr, format='%m/%d/%Y', unit='s')
+    got = strptime(arr, format='%m/%d/%Y', unit='s')
     expected = pa.array(
         [datetime.datetime(2020, 5, 1), None, datetime.datetime(1900, 12, 13)],
         type=pa.timestamp('s'))
     assert got == expected
     # Positional format
-    assert pc.strptime(arr, '%m/%d/%Y', unit='s') == got
+    assert strptime(arr, '%m/%d/%Y', unit='s') == got
 
     expected = pa.array([datetime.datetime(2020, 1, 5), None, None],
                         type=pa.timestamp('s'))
-    got = pc.strptime(arr, format='%d/%m/%Y', unit='s', error_is_null=True)
+    got = strptime(arr, format='%d/%m/%Y', unit='s', error_is_null=True)
     assert got == expected
 
     with pytest.raises(pa.ArrowInvalid,
                        match="Failed to parse string: '5/1/2020'"):
-        pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=False)
+        strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=False)
 
     with pytest.raises(pa.ArrowInvalid,
                        match="Failed to parse string: '5/1/2020'"):
-        pc.strptime(arr, format='%Y-%m-%d', unit='s')
+        strptime(arr, format='%Y-%m-%d', unit='s')
 
-    got = pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=True)
+    got = strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=True)
     assert got == pa.array([None, None, None], type=pa.timestamp('s'))
 
 
@@ -2290,7 +2325,7 @@ def test_strftime():
             tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
             for fmt in formats:
                 options = pc.StrftimeOptions(fmt)
-                result = pc.strftime(tsa, options=options)
+                result = strftime(tsa, options=options)
                 # cast to the same type as result to ignore string vs large_string
                 expected = pa.array(ts.strftime(fmt)).cast(result.type)
                 assert result.equals(expected)
@@ -2299,34 +2334,34 @@ def test_strftime():
 
         # Default format
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
-        result = pc.strftime(tsa, options=pc.StrftimeOptions())
+        result = strftime(tsa, options=pc.StrftimeOptions())
         expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
         # Default format plus timezone
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
-        result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
+        result = strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
         expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S is equivalent to %S in arrow for unit="s"
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions("%S")
-        result = pc.strftime(tsa, options=options)
+        result = strftime(tsa, options=options)
         expected = pa.array(ts.strftime("%S")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S.%f is equivalent to %S in arrow for unit="us"
         tsa = pa.array(ts, type=pa.timestamp("us", timezone))
         options = pc.StrftimeOptions("%S")
-        result = pc.strftime(tsa, options=options)
+        result = strftime(tsa, options=options)
         expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
         assert result.equals(expected)
 
         # Test setting locale
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions(fmt, locale="C")
-        result = pc.strftime(tsa, options=options)
+        result = strftime(tsa, options=options)
         expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
@@ -2334,19 +2369,19 @@ def test_strftime():
     fmt = "%Y-%m-%dT%H:%M:%S"
     ts = pd.to_datetime(times)
     tsa = pa.array(ts, type=pa.timestamp("s"))
-    result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
+    result = strftime(tsa, options=pc.StrftimeOptions(fmt))
     expected = pa.array(ts.strftime(fmt)).cast(result.type)
 
     # Positional format
-    assert pc.strftime(tsa, fmt) == result
+    assert strftime(tsa, fmt) == result
 
     assert result.equals(expected)
     with pytest.raises(pa.ArrowInvalid,
                        match="Timezone not present, cannot convert to string"):
-        pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
+        strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
     with pytest.raises(pa.ArrowInvalid,
                        match="Timezone not present, cannot convert to string"):
-        pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z"))
+        strftime(tsa, options=pc.StrftimeOptions(fmt + "%z"))
 
 
 def _check_datetime_components(timestamps, timezone=None):
@@ -2394,42 +2429,42 @@ def _check_datetime_components(timestamps, timezone=None):
     microsecond = ts.dt.microsecond.astype("int64")
     nanosecond = ts.dt.nanosecond.astype("int64")
 
-    assert pc.year(tsa).equals(pa.array(year))
-    assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
-    assert pc.month(tsa).equals(pa.array(month))
-    assert pc.day(tsa).equals(pa.array(day))
-    assert pc.day_of_week(tsa).equals(pa.array(dayofweek))
-    assert pc.day_of_year(tsa).equals(pa.array(dayofyear))
-    assert pc.iso_year(tsa).equals(pa.array(iso_year))
-    assert pc.iso_week(tsa).equals(pa.array(iso_week))
-    assert pc.iso_calendar(tsa).equals(iso_calendar)
-    assert pc.quarter(tsa).equals(pa.array(quarter))
-    assert pc.hour(tsa).equals(pa.array(hour))
-    assert pc.minute(tsa).equals(pa.array(minute))
-    assert pc.second(tsa).equals(pa.array(second))
-    assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
-    assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
-    assert pc.nanosecond(tsa).equals(pa.array(nanosecond))
-    assert pc.subsecond(tsa).equals(pa.array(subseconds))
-    assert pc.local_timestamp(tsa).equals(pa.array(ts.dt.tz_localize(None)))
+    assert pc_year(tsa).equals(pa.array(year))
+    assert pc_is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
+    assert pc_month(tsa).equals(pa.array(month))
+    assert pc_day(tsa).equals(pa.array(day))
+    assert pc_day_of_week(tsa).equals(pa.array(dayofweek))
+    assert pc_day_of_year(tsa).equals(pa.array(dayofyear))
+    assert pc_iso_year(tsa).equals(pa.array(iso_year))
+    assert pc_iso_week(tsa).equals(pa.array(iso_week))
+    assert pc_iso_calendar(tsa).equals(iso_calendar)
+    assert pc_quarter(tsa).equals(pa.array(quarter))
+    assert pc_hour(tsa).equals(pa.array(hour))
+    assert pc_minute(tsa).equals(pa.array(minute))
+    assert pc_second(tsa).equals(pa.array(second))
+    assert pc_millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
+    assert pc_microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
+    assert pc_nanosecond(tsa).equals(pa.array(nanosecond))
+    assert pc_subsecond(tsa).equals(pa.array(subseconds))
+    assert pc_local_timestamp(tsa).equals(pa.array(ts.dt.tz_localize(None)))
 
     if ts.dt.tz:
         if ts.dt.tz is datetime.timezone.utc:
             # datetime with utc returns None for dst()
-            is_dst = [False] * len(ts)
+            arr_is_dst = [False] * len(ts)
         else:
-            is_dst = ts.apply(lambda x: x.dst().seconds > 0)
-        assert pc.is_dst(tsa).equals(pa.array(is_dst))
+            arr_is_dst = ts.apply(lambda x: x.dst().seconds > 0)
+        assert pc_is_dst(tsa).equals(pa.array(arr_is_dst))
 
     day_of_week_options = pc.DayOfWeekOptions(
         count_from_zero=False, week_start=1)
-    assert pc.day_of_week(tsa, options=day_of_week_options).equals(
+    assert pc_day_of_week(tsa, options=day_of_week_options).equals(
         pa.array(dayofweek + 1))
 
     week_options = pc.WeekOptions(
         week_starts_monday=True, count_from_zero=False,
         first_week_is_fully_in_year=False)
-    assert pc.week(tsa, options=week_options).equals(pa.array(iso_week))
+    assert pc_week(tsa, options=week_options).equals(pa.array(iso_week))
 
 
 @pytest.mark.pandas
@@ -2468,7 +2503,7 @@ def test_iso_calendar_longer_array(unit):
     # https://github.com/apache/arrow/issues/38655
     # ensure correct result for array length > 32
     arr = pa.array([datetime.datetime(2022, 1, 2, 9)]*50, pa.timestamp(unit))
-    result = pc.iso_calendar(arr)
+    result = pc_iso_calendar(arr)
     expected = pa.StructArray.from_arrays(
         [[2021]*50, [52]*50, [7]*50],
         names=['iso_year', 'iso_week', 'iso_day_of_week']
@@ -2507,18 +2542,18 @@ def test_assume_timezone():
         options = pc.AssumeTimezoneOptions(timezone)
         ta = pa.array(timestamps, type=ts_type)
         expected = timestamps.tz_localize(timezone)
-        result = pc.assume_timezone(ta, options=options)
+        result = pc_assume_timezone(ta, options=options)
         assert result.equals(pa.array(expected))
-        result = pc.assume_timezone(ta, timezone)  # Positional option
+        result = pc_assume_timezone(ta, timezone)  # Positional option
         assert result.equals(pa.array(expected))
 
         ta_zoned = pa.array(timestamps, type=pa.timestamp("ns", timezone))
         with pytest.raises(pa.ArrowInvalid, match="already have a timezone:"):
-            pc.assume_timezone(ta_zoned, options=options)
+            pc_assume_timezone(ta_zoned, options=options)
 
     invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss")
     with pytest.raises(ValueError, match="not found in timezone database"):
-        pc.assume_timezone(ta, options=invalid_options)
+        pc_assume_timezone(ta, options=invalid_options)
 
     timezone = "Europe/Brussels"
 
@@ -2531,18 +2566,18 @@ def test_assume_timezone():
     with pytest.raises(ValueError,
                        match="Timestamp doesn't exist in "
                        f"timezone '{timezone}'"):
-        pc.assume_timezone(nonexistent_array,
+        pc_assume_timezone(nonexistent_array,
                            options=options_nonexistent_raise)
 
     expected = pa.array(nonexistent.tz_localize(
         timezone, nonexistent="shift_forward"))
-    result = pc.assume_timezone(
+    result = pc_assume_timezone(
         nonexistent_array, options=options_nonexistent_latest)
     expected.equals(result)
 
     expected = pa.array(nonexistent.tz_localize(
         timezone, nonexistent="shift_backward"))
-    result = pc.assume_timezone(
+    result = pc_assume_timezone(
         nonexistent_array, options=options_nonexistent_earliest)
     expected.equals(result)
 
@@ -2555,16 +2590,16 @@ def test_assume_timezone():
     with pytest.raises(ValueError,
                        match="Timestamp is ambiguous in "
                              f"timezone '{timezone}'"):
-        pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
+        pc_assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
     expected = ambiguous.tz_localize(timezone, ambiguous=np.array([True, True, True]))
-    result = pc.assume_timezone(
+    result = pc_assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
     expected = ambiguous.tz_localize(
         timezone, ambiguous=np.array([False, False, False]))
-    result = pc.assume_timezone(
+    result = pc_assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
 
@@ -2593,15 +2628,15 @@ def _check_temporal_rounding(ts, values, unit):
         frequency = str(value) + unit_shorthand[unit]
         options = pc.RoundTemporalOptions(value, unit)
 
-        result = pc.ceil_temporal(ta, options=options).to_pandas()
+        result = ceil_temporal(ta, options=options).to_pandas()
         expected = ts.dt.ceil(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = pc.floor_temporal(ta, options=options).to_pandas()
+        result = floor_temporal(ta, options=options).to_pandas()
         expected = ts.dt.floor(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = pc.round_temporal(ta, options=options).to_pandas()
+        result = round_temporal(ta, options=options).to_pandas()
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
@@ -2614,29 +2649,29 @@ def _check_temporal_rounding(ts, values, unit):
             origin = ts.dt.floor(greater_unit[unit])
 
             if ta.type.tz is None:
-                result = pc.ceil_temporal(ta, options=options).to_pandas()
+                result = ceil_temporal(ta, options=options).to_pandas()
                 expected = (ts - origin).dt.ceil(frequency) + origin
                 np.testing.assert_array_equal(result, expected)
 
-            result = pc.floor_temporal(ta, options=options).to_pandas()
+            result = floor_temporal(ta, options=options).to_pandas()
             expected = (ts - origin).dt.floor(frequency) + origin
             np.testing.assert_array_equal(result, expected)
 
-            result = pc.round_temporal(ta, options=options).to_pandas()
+            result = round_temporal(ta, options=options).to_pandas()
             expected = (ts - origin).dt.round(frequency) + origin
             np.testing.assert_array_equal(result, expected)
 
         # Check RoundTemporalOptions partial defaults
         if unit == "day":
-            result = pc.ceil_temporal(ta, multiple=value).to_pandas()
+            result = ceil_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.ceil(frequency)
             np.testing.assert_array_equal(result, expected)
 
-            result = pc.floor_temporal(ta, multiple=value).to_pandas()
+            result = floor_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.floor(frequency)
             np.testing.assert_array_equal(result, expected)
 
-            result = pc.round_temporal(ta, multiple=value).to_pandas()
+            result = round_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.round(frequency)
             np.testing.assert_array_equal(result, expected)
 
@@ -2647,7 +2682,7 @@ def _check_temporal_rounding(ts, values, unit):
     if ta.type.tz is None:
         options = pc.RoundTemporalOptions(
             value, unit, ceil_is_strictly_greater=True)
-        result = pc.ceil_temporal(ta, options=options)
+        result = ceil_temporal(ta, options=options)
         expected = ts.dt.ceil(frequency)
 
         expected = np.where(
@@ -2660,15 +2695,15 @@ def _check_temporal_rounding(ts, values, unit):
     if unit == "day":
         frequency = "1D"
 
-        result = pc.ceil_temporal(ta).to_pandas()
+        result = ceil_temporal(ta).to_pandas()
         expected = ts.dt.ceil(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = pc.floor_temporal(ta).to_pandas()
+        result = floor_temporal(ta).to_pandas()
         expected = ts.dt.floor(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = pc.round_temporal(ta).to_pandas()
+        result = round_temporal(ta).to_pandas()
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
@@ -2706,15 +2741,15 @@ def test_round_temporal(unit):
 
 def test_count():
     arr = pa.array([1, 2, 3, None, None])
-    assert pc.count(arr).as_py() == 3
-    assert pc.count(arr, mode='only_valid').as_py() == 3
-    assert pc.count(arr, mode='only_null').as_py() == 2
-    assert pc.count(arr, mode='all').as_py() == 5
-    assert pc.count(arr, 'all').as_py() == 5
+    assert count(arr).as_py() == 3
+    assert count(arr, mode='only_valid').as_py() == 3
+    assert count(arr, mode='only_null').as_py() == 2
+    assert count(arr, mode='all').as_py() == 5
+    assert count(arr, 'all').as_py() == 5
 
     with pytest.raises(ValueError,
                        match='"something else" is not a valid count mode'):
-        pc.count(arr, 'something else')
+        count(arr, 'something else')
 
 
 def test_index():
@@ -2756,15 +2791,15 @@ def test_partition_nth():
     data = list(range(100, 140))
     random.shuffle(data)
     pivot = 10
-    indices = pc.partition_nth_indices(data, pivot=pivot)
+    indices = partition_nth_indices(data, pivot=pivot)
     check_partition_nth(data, indices, pivot, "at_end")
     # Positional pivot argument
-    assert pc.partition_nth_indices(data, pivot) == indices
+    assert partition_nth_indices(data, pivot) == indices
 
     with pytest.raises(
             ValueError,
             match="'partition_nth_indices' cannot be called without options"):
-        pc.partition_nth_indices(data)
+        partition_nth_indices(data)
 
 
 def test_partition_nth_null_placement():
@@ -2773,14 +2808,14 @@ def test_partition_nth_null_placement():
 
     for pivot in (0, 7, 13, 19):
         for null_placement in ("at_start", "at_end"):
-            indices = pc.partition_nth_indices(data, pivot=pivot,
-                                               null_placement=null_placement)
+            indices = partition_nth_indices(data, pivot=pivot,
+                                            null_placement=null_placement)
             check_partition_nth(data, indices, pivot, null_placement)
 
 
 def test_select_k_array():
     def validate_select_k(select_k_indices, arr, order, stable_sort=False):
-        sorted_indices = pc.sort_indices(arr, sort_keys=[("dummy", order)])
+        sorted_indices = sort_indices(arr, sort_keys=[("dummy", order)])
         head_k_indices = sorted_indices.slice(0, len(select_k_indices))
         if stable_sort:
             assert select_k_indices == head_k_indices
@@ -2792,7 +2827,7 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
     arr = pa.array([1, 2, None, 0])
     for k in [0, 2, 4]:
         for order in ["descending", "ascending"]:
-            result = pc.select_k_unstable(
+            result = select_k_unstable(
                 arr, k=k, sort_keys=[("dummy", order)])
             validate_select_k(result, arr, order)
 
@@ -2802,26 +2837,26 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
         result = pc.bottom_k_unstable(arr, k=k)
         validate_select_k(result, arr, "ascending")
 
-    result = pc.select_k_unstable(
+    result = select_k_unstable(
         arr, options=pc.SelectKOptions(
             k=2, sort_keys=[("dummy", "descending")])
     )
     validate_select_k(result, arr, "descending")
 
-    result = pc.select_k_unstable(
+    result = select_k_unstable(
         arr, options=pc.SelectKOptions(k=2, sort_keys=[("dummy", "ascending")])
     )
     validate_select_k(result, arr, "ascending")
 
     # Position options
-    assert pc.select_k_unstable(arr, 2,
-                                sort_keys=[("dummy", "ascending")]) == result
-    assert pc.select_k_unstable(arr, 2, [("dummy", "ascending")]) == result
+    assert select_k_unstable(arr, 2,
+                             sort_keys=[("dummy", "ascending")]) == result
+    assert select_k_unstable(arr, 2, [("dummy", "ascending")]) == result
 
 
 def test_select_k_table():
     def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
-        sorted_indices = pc.sort_indices(tbl, sort_keys=sort_keys)
+        sorted_indices = sort_indices(tbl, sort_keys=sort_keys)
         head_k_indices = sorted_indices.slice(0, len(select_k_indices))
         if stable_sort:
             assert select_k_indices == head_k_indices
@@ -2832,11 +2867,11 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
 
     table = pa.table({"a": [1, 2, 0], "b": [1, 0, 1]})
     for k in [0, 2, 4]:
-        result = pc.select_k_unstable(
+        result = select_k_unstable(
             table, k=k, sort_keys=[("a", "ascending")])
         validate_select_k(result, table, sort_keys=[("a", "ascending")])
 
-        result = pc.select_k_unstable(
+        result = select_k_unstable(
             table, k=k, sort_keys=[(pc.field("a"), "ascending"), ("b", "ascending")])
         validate_select_k(
             result, table, sort_keys=[("a", "ascending"), ("b", "ascending")])
@@ -2851,65 +2886,65 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(
             ValueError,
             match="'select_k_unstable' cannot be called without options"):
-        pc.select_k_unstable(table)
+        select_k_unstable(table)
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a nonnegative `k`"):
-        pc.select_k_unstable(table, k=-1, sort_keys=[("a", "ascending")])
+        select_k_unstable(table, k=-1, sort_keys=[("a", "ascending")])
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a "
                              "non-empty `sort_keys`"):
-        pc.select_k_unstable(table, k=2, sort_keys=[])
+        select_k_unstable(table, k=2, sort_keys=[])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
+        select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
+        select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
 
 
 def test_array_sort_indices():
     arr = pa.array([1, 2, None, 0])
-    result = pc.array_sort_indices(arr)
+    result = array_sort_indices(arr)
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = pc.array_sort_indices(arr, order="ascending")
+    result = array_sort_indices(arr, order="ascending")
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = pc.array_sort_indices(arr, order="descending")
+    result = array_sort_indices(arr, order="descending")
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = pc.array_sort_indices(arr, order="descending",
-                                   null_placement="at_start")
+    result = array_sort_indices(arr, order="descending",
+                                null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
-    result = pc.array_sort_indices(arr, "descending",
-                                   null_placement="at_start")
+    result = array_sort_indices(arr, "descending",
+                                null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.array_sort_indices(arr, order="nonscending")
+        array_sort_indices(arr, order="nonscending")
 
 
 def test_sort_indices_array():
     arr = pa.array([1, 2, None, 0])
-    result = pc.sort_indices(arr)
+    result = sort_indices(arr)
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = pc.sort_indices(arr, sort_keys=[("dummy", "ascending")])
+    result = sort_indices(arr, sort_keys=[("dummy", "ascending")])
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")])
+    result = sort_indices(arr, sort_keys=[("dummy", "descending")])
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")],
-                             null_placement="at_start")
+    result = sort_indices(arr, sort_keys=[("dummy", "descending")],
+                          null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
     # Positional `sort_keys`
-    result = pc.sort_indices(arr, [("dummy", "descending")],
-                             null_placement="at_start")
+    result = sort_indices(arr, [("dummy", "descending")],
+                          null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
     # Using SortOptions
-    result = pc.sort_indices(
+    result = sort_indices(
         arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")])
     )
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = pc.sort_indices(
+    result = sort_indices(
         arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")],
                                     null_placement="at_start")
     )
@@ -2919,134 +2954,134 @@ def test_sort_indices_array():
 def test_sort_indices_table():
     table = pa.table({"a": [1, 1, None, 0], "b": [1, 0, 0, 1]})
 
-    result = pc.sort_indices(table, sort_keys=[("a", "ascending")])
+    result = sort_indices(table, sort_keys=[("a", "ascending")])
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = pc.sort_indices(table, sort_keys=[(pc.field("a"), "ascending")],
-                             null_placement="at_start")
+    result = sort_indices(table, sort_keys=[(pc.field("a"), "ascending")],
+                          null_placement="at_start")
     assert result.to_pylist() == [2, 3, 0, 1]
 
-    result = pc.sort_indices(
+    result = sort_indices(
         table, sort_keys=[("a", "descending"), ("b", "ascending")]
     )
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = pc.sort_indices(
+    result = sort_indices(
         table, sort_keys=[("a", "descending"), ("b", "ascending")],
         null_placement="at_start"
     )
     assert result.to_pylist() == [2, 1, 0, 3]
     # Positional `sort_keys`
-    result = pc.sort_indices(
+    result = sort_indices(
         table, [("a", "descending"), ("b", "ascending")],
         null_placement="at_start"
     )
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="Must specify one or more sort keys"):
-        pc.sort_indices(table)
+        sort_indices(table)
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.sort_indices(table, sort_keys=[("unknown", "ascending")])
+        sort_indices(table, sort_keys=[("unknown", "ascending")])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.sort_indices(table, sort_keys=[("a", "nonscending")])
+        sort_indices(table, sort_keys=[("a", "nonscending")])
 
 
 def test_is_in():
     arr = pa.array([1, 2, None, 1, 2, 3])
 
-    result = pc.is_in(arr, value_set=pa.array([1, 3, None]))
+    result = is_in(arr, value_set=pa.array([1, 3, None]))
     assert result.to_pylist() == [True, False, True, True, False, True]
 
-    result = pc.is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
+    result = is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
     assert result.to_pylist() == [True, False, False, True, False, True]
 
-    result = pc.is_in(arr, value_set=pa.array([1, 3]))
+    result = is_in(arr, value_set=pa.array([1, 3]))
     assert result.to_pylist() == [True, False, False, True, False, True]
 
-    result = pc.is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
+    result = is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [True, False, False, True, False, True]
 
 
 def test_index_in():
     arr = pa.array([1, 2, None, 1, 2, 3])
 
-    result = pc.index_in(arr, value_set=pa.array([1, 3, None]))
+    result = index_in(arr, value_set=pa.array([1, 3, None]))
     assert result.to_pylist() == [0, None, 2, 0, None, 1]
 
-    result = pc.index_in(arr, value_set=pa.array([1, 3, None]),
-                         skip_nulls=True)
+    result = index_in(arr, value_set=pa.array([1, 3, None]),
+                      skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
-    result = pc.index_in(arr, value_set=pa.array([1, 3]))
+    result = index_in(arr, value_set=pa.array([1, 3]))
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
-    result = pc.index_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
+    result = index_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
     # Positional value_set
-    result = pc.index_in(arr, pa.array([1, 3]), skip_nulls=True)
+    result = index_in(arr, pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
 
 def test_quantile():
     arr = pa.array([1, 2, 3, 4])
 
-    result = pc.quantile(arr)
+    result = quantile(arr)
     assert result.to_pylist() == [2.5]
 
-    result = pc.quantile(arr, interpolation='lower')
+    result = quantile(arr, interpolation='lower')
     assert result.to_pylist() == [2]
-    result = pc.quantile(arr, interpolation='higher')
+    result = quantile(arr, interpolation='higher')
     assert result.to_pylist() == [3]
-    result = pc.quantile(arr, interpolation='nearest')
+    result = quantile(arr, interpolation='nearest')
     assert result.to_pylist() == [3]
-    result = pc.quantile(arr, interpolation='midpoint')
+    result = quantile(arr, interpolation='midpoint')
     assert result.to_pylist() == [2.5]
-    result = pc.quantile(arr, interpolation='linear')
+    result = quantile(arr, interpolation='linear')
     assert result.to_pylist() == [2.5]
 
     arr = pa.array([1, 2])
 
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75])
+    result = quantile(arr, q=[0.25, 0.5, 0.75])
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='lower')
+    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='lower')
     assert result.to_pylist() == [1, 1, 1]
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='higher')
+    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='higher')
     assert result.to_pylist() == [2, 2, 2]
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='midpoint')
+    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='midpoint')
     assert result.to_pylist() == [1.5, 1.5, 1.5]
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='nearest')
+    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='nearest')
     assert result.to_pylist() == [1, 1, 2]
-    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='linear')
+    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='linear')
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     # Positional `q`
-    result = pc.quantile(arr, [0.25, 0.5, 0.75], interpolation='linear')
+    result = quantile(arr, [0.25, 0.5, 0.75], interpolation='linear')
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
-        pc.quantile(arr, q=1.1)
+        quantile(arr, q=1.1)
     with pytest.raises(ValueError, match="not a valid quantile interpolation"):
-        pc.quantile(arr, interpolation='zzz')
+        quantile(arr, interpolation='zzz')
 
 
 def test_tdigest():
     arr = pa.array([1, 2, 3, 4])
-    result = pc.tdigest(arr)
+    result = tdigest(arr)
     assert result.to_pylist() == [2.5]
 
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
-    result = pc.tdigest(arr)
+    result = tdigest(arr)
     assert result.to_pylist() == [2.5]
 
     arr = pa.array([1, 2, 3, 4])
-    result = pc.tdigest(arr, q=[0, 0.5, 1])
+    result = tdigest(arr, q=[0, 0.5, 1])
     assert result.to_pylist() == [1, 2.5, 4]
 
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
-    result = pc.tdigest(arr, [0, 0.5, 1])  # positional `q`
+    result = tdigest(arr, [0, 0.5, 1])  # positional `q`
     assert result.to_pylist() == [1, 2.5, 4]
 
 
@@ -3062,32 +3097,32 @@ def test_min_max_element_wise():
     arr2 = pa.array([3, 1, 2])
     arr3 = pa.array([2, 3, None])
 
-    result = pc.max_element_wise(arr1, arr2)
+    result = max_element_wise(arr1, arr2)
     assert result == pa.array([3, 2, 3])
-    result = pc.min_element_wise(arr1, arr2)
+    result = min_element_wise(arr1, arr2)
     assert result == pa.array([1, 1, 2])
 
-    result = pc.max_element_wise(arr1, arr2, arr3)
+    result = max_element_wise(arr1, arr2, arr3)
     assert result == pa.array([3, 3, 3])
-    result = pc.min_element_wise(arr1, arr2, arr3)
+    result = min_element_wise(arr1, arr2, arr3)
     assert result == pa.array([1, 1, 2])
 
     # with specifying the option
-    result = pc.max_element_wise(arr1, arr3, skip_nulls=True)
+    result = max_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([2, 3, 3])
-    result = pc.min_element_wise(arr1, arr3, skip_nulls=True)
+    result = min_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([1, 2, 3])
-    result = pc.max_element_wise(
+    result = max_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([2, 3, 3])
-    result = pc.min_element_wise(
+    result = min_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([1, 2, 3])
 
     # not skipping nulls
-    result = pc.max_element_wise(arr1, arr3, skip_nulls=False)
+    result = max_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([2, 3, None])
-    result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
+    result = min_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([1, 2, None])
 
 
@@ -3113,9 +3148,9 @@ def test_cumulative_sum(start, skip_nulls):
             if skip_nulls else pa.chunked_array([[0, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
             # Add `start` offset to expected array before comparing
-            expected = pc.add(expected_arrays[i], strt if strt is not None
+            expected = pc_add(expected_arrays[i], strt if strt is not None
                               else 0)
             assert result.equals(expected)
 
@@ -3134,16 +3169,16 @@ def test_cumulative_sum(start, skip_nulls):
             if skip_nulls else np.array([1, np.nan, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
             # Add `start` offset to expected array before comparing
-            expected = pc.add(expected_arrays[i], strt if strt is not None
+            expected = pc_add(expected_arrays[i], strt if strt is not None
                               else 0)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_sum([1, 2, 3], start=strt)
+            cumulative_sum([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3168,10 +3203,10 @@ def test_cumulative_prod(start, skip_nulls):
             if skip_nulls else pa.chunked_array([[1, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
             # Multiply `start` offset to expected array before comparing
-            expected = pc.multiply(expected_arrays[i], strt if strt is not None
-                                   else 1)
+            expected = multiply(expected_arrays[i], strt if strt is not None
+                                else 1)
             assert result.equals(expected)
 
     starts = [None, start, pa.scalar(start, type=pa.float32()),
@@ -3189,16 +3224,16 @@ def test_cumulative_prod(start, skip_nulls):
             if skip_nulls else np.array([1, np.nan, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
             # Multiply `start` offset to expected array before comparing
-            expected = pc.multiply(expected_arrays[i], strt if strt is not None
-                                   else 1)
+            expected = multiply(expected_arrays[i], strt if strt is not None
+                                else 1)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_prod([1, 2, 3], start=strt)
+            cumulative_prod([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3224,9 +3259,9 @@ def test_cumulative_max(start, skip_nulls):
             pa.chunked_array([[2, 2, None, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
             # Max `start` offset with expected array before comparing
-            expected = pc.max_element_wise(
+            expected = max_element_wise(
                 expected_arrays[i], strt if strt is not None else int(-1e9),
                 skip_nulls=False)
             assert result.equals(expected)
@@ -3246,9 +3281,9 @@ def test_cumulative_max(start, skip_nulls):
             if skip_nulls else np.array([2.5, 2.5, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
             # Max `start` offset with expected array before comparing
-            expected = pc.max_element_wise(
+            expected = max_element_wise(
                 expected_arrays[i], strt if strt is not None else -1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
@@ -3256,7 +3291,7 @@ def test_cumulative_max(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)
+            cumulative_max([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3282,9 +3317,9 @@ def test_cumulative_min(start, skip_nulls):
             pa.chunked_array([[5, 5, None, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
             # Min `start` offset with expected array before comparing
-            expected = pc.min_element_wise(
+            expected = min_element_wise(
                 expected_arrays[i], strt if strt is not None else int(1e9),
                 skip_nulls=False)
             assert result.equals(expected)
@@ -3304,9 +3339,9 @@ def test_cumulative_min(start, skip_nulls):
             if skip_nulls else np.array([5.5, 5.5, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+            result = cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
             # Min `start` offset with expected array before comparing
-            expected = pc.min_element_wise(
+            expected = min_element_wise(
                 expected_arrays[i], strt if strt is not None else 1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
@@ -3314,26 +3349,26 @@ def test_cumulative_min(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)
+            cumulative_max([1, 2, 3], start=strt)
 
 
 def test_make_struct():
-    assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+    assert make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
 
-    assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+    assert make_struct(1, 'a', field_names=['i', 's']).as_py() == {
         'i': 1, 's': 'a'}
 
-    assert pc.make_struct([1, 2, 3],
-                          "a b c".split()) == pa.StructArray.from_arrays([
-                              [1, 2, 3],
-                              "a b c".split()], names='0 1'.split())
+    assert make_struct([1, 2, 3],
+                       "a b c".split()) == pa.StructArray.from_arrays([
+                           [1, 2, 3],
+                           "a b c".split()], names='0 1'.split())
 
     with pytest.raises(ValueError,
                        match="Array arguments must all be the same length"):
-        pc.make_struct([1, 2, 3, 4], "a b c".split())
+        make_struct([1, 2, 3, 4], "a b c".split())
 
     with pytest.raises(ValueError, match="0 arguments but 2 field names"):
-        pc.make_struct(field_names=['one', 'two'])
+        make_struct(field_names=['one', 'two'])
 
 
 def test_map_lookup():
@@ -3345,12 +3380,12 @@ def test_map_lookup():
     result_all = pa.array([[1], None, None, [5, 7], None],
                           type=pa.list_(pa.int32()))
 
-    assert pc.map_lookup(arr, 'one', 'first') == result_first
-    assert pc.map_lookup(arr, pa.scalar(
+    assert map_lookup(arr, 'one', 'first') == result_first
+    assert map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'first') == result_first
-    assert pc.map_lookup(arr, pa.scalar(
+    assert map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'last') == result_last
-    assert pc.map_lookup(arr, pa.scalar(
+    assert map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'all') == result_all
 
 
@@ -3360,42 +3395,42 @@ def test_struct_fields_options():
     c = pa.StructArray.from_arrays([a, b], ["a", "b"])
     arr = pa.StructArray.from_arrays([a, c], ["a", "c"])
 
-    assert pc.struct_field(arr, '.c.b') == b
-    assert pc.struct_field(arr, b'.c.b') == b
-    assert pc.struct_field(arr, ['c', 'b']) == b
-    assert pc.struct_field(arr, [1, 'b']) == b
-    assert pc.struct_field(arr, (b'c', 'b')) == b
-    assert pc.struct_field(arr, pc.field(('c', 'b'))) == b
+    assert struct_field(arr, '.c.b') == b
+    assert struct_field(arr, b'.c.b') == b
+    assert struct_field(arr, ['c', 'b']) == b
+    assert struct_field(arr, [1, 'b']) == b
+    assert struct_field(arr, (b'c', 'b')) == b
+    assert struct_field(arr, pc.field(('c', 'b'))) == b
 
-    assert pc.struct_field(arr, '.a') == a
-    assert pc.struct_field(arr, ['a']) == a
-    assert pc.struct_field(arr, 'a') == a
-    assert pc.struct_field(arr, pc.field(('a',))) == a
+    assert struct_field(arr, '.a') == a
+    assert struct_field(arr, ['a']) == a
+    assert struct_field(arr, 'a') == a
+    assert struct_field(arr, pc.field(('a',))) == a
 
-    assert pc.struct_field(arr, indices=[1, 1]) == b
-    assert pc.struct_field(arr, (1, 1)) == b
-    assert pc.struct_field(arr, [0]) == a
-    assert pc.struct_field(arr, []) == arr
+    assert struct_field(arr, indices=[1, 1]) == b
+    assert struct_field(arr, (1, 1)) == b
+    assert struct_field(arr, [0]) == a
+    assert struct_field(arr, []) == arr
 
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        pc.struct_field(arr, 'foo')
+        struct_field(arr, 'foo')
 
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        pc.struct_field(arr, '.c.foo')
+        struct_field(arr, '.c.foo')
 
     # drill into a non-struct array and continue to ask for a field
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        pc.struct_field(arr, '.a.foo')
+        struct_field(arr, '.a.foo')
 
     # TODO: https://issues.apache.org/jira/browse/ARROW-14853
-    # assert pc.struct_field(arr) == arr
+    # assert struct_field(arr) == arr
 
 
 def test_case_when():
-    assert pc.case_when(pc.make_struct([True, False, None],
-                                       [False, True, None]),
-                        [1, 2, 3],
-                        [11, 12, 13]) == pa.array([1, 12, None])
+    assert case_when(make_struct([True, False, None],
+                                 [False, True, None]),
+                     [1, 2, 3],
+                     [11, 12, 13]) == pa.array([1, 12, None])
 
 
 def test_list_element():
@@ -3406,12 +3441,12 @@ def test_list_element():
     lists = pa.array([l1, l2], list_type)
 
     index = 1
-    result = pa.compute.list_element(lists, index)
+    result = list_element(lists, index)
     expected = pa.array([None, {'a': 0.52, 'b': 3}], element_type)
     assert result.equals(expected)
 
     index = 4
-    result = pa.compute.list_element(lists, index)
+    result = list_element(lists, index)
     expected = pa.array([{'a': 5.6, 'b': 6}, {'a': .6, 'b': 8}], element_type)
     assert result.equals(expected)
 
@@ -3419,28 +3454,28 @@ def test_list_element():
 def test_count_distinct():
     samples = [datetime.datetime(year=y, month=1, day=1) for y in range(1992, 2092)]
     arr = pa.array(samples, pa.timestamp("ns"))
-    assert pc.count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())
+    assert count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())
 
 
 def test_count_distinct_options():
     arr = pa.array([1, 2, 3, None, None])
-    assert pc.count_distinct(arr).as_py() == 3
-    assert pc.count_distinct(arr, mode='only_valid').as_py() == 3
-    assert pc.count_distinct(arr, mode='only_null').as_py() == 1
-    assert pc.count_distinct(arr, mode='all').as_py() == 4
-    assert pc.count_distinct(arr, 'all').as_py() == 4
+    assert count_distinct(arr).as_py() == 3
+    assert count_distinct(arr, mode='only_valid').as_py() == 3
+    assert count_distinct(arr, mode='only_null').as_py() == 1
+    assert count_distinct(arr, mode='all').as_py() == 4
+    assert count_distinct(arr, 'all').as_py() == 4
 
 
 def test_utf8_normalize():
     arr = pa.array(["01²3"])
-    assert pc.utf8_normalize(arr, form="NFC") == arr
-    assert pc.utf8_normalize(arr, form="NFKC") == pa.array(["0123"])
-    assert pc.utf8_normalize(arr, "NFD") == arr
-    assert pc.utf8_normalize(arr, "NFKD") == pa.array(["0123"])
+    assert utf8_normalize(arr, form="NFC") == arr
+    assert utf8_normalize(arr, form="NFKC") == pa.array(["0123"])
+    assert utf8_normalize(arr, "NFD") == arr
+    assert utf8_normalize(arr, "NFKD") == pa.array(["0123"])
     with pytest.raises(
             ValueError,
             match='"NFZ" is not a valid Unicode normalization form'):
-        pc.utf8_normalize(arr, form="NFZ")
+        utf8_normalize(arr, form="NFZ")
 
 
 def test_random():
@@ -3482,7 +3517,7 @@ def test_rank_options_tiebreaker(tiebreaker, expected_values):
     rank_options = pc.RankOptions(sort_keys="ascending",
                                   null_placement="at_end",
                                   tiebreaker=tiebreaker)
-    result = pc.rank(arr, options=rank_options)
+    result = rank(arr, options=rank_options)
     expected = pa.array(expected_values, type=pa.uint64())
     assert result.equals(expected)
 
@@ -3492,24 +3527,24 @@ def test_rank_options():
     expected = pa.array([3, 1, 4, 6, 5, 7, 2], type=pa.uint64())
 
     # Ensure rank can be called without specifying options
-    result = pc.rank(arr)
+    result = rank(arr)
     assert result.equals(expected)
 
     # Ensure default RankOptions
-    result = pc.rank(arr, options=pc.RankOptions())
+    result = rank(arr, options=pc.RankOptions())
     assert result.equals(expected)
 
     # Ensure sort_keys tuple usage
-    result = pc.rank(arr, options=pc.RankOptions(
+    result = rank(arr, options=pc.RankOptions(
         sort_keys=[("b", "ascending")])
     )
     assert result.equals(expected)
 
-    result = pc.rank(arr, null_placement="at_start")
+    result = rank(arr, null_placement="at_start")
     expected_at_start = pa.array([5, 3, 6, 1, 7, 2, 4], type=pa.uint64())
     assert result.equals(expected_at_start)
 
-    result = pc.rank(arr, sort_keys="descending")
+    result = rank(arr, sort_keys="descending")
     expected_descending = pa.array([3, 4, 1, 6, 2, 7, 5], type=pa.uint64())
     assert result.equals(expected_descending)
 
@@ -3525,29 +3560,29 @@ def test_rank_quantile_options():
     expected = pa.array([0.7, 0.1, 0.7, 0.3, 0.7], type=pa.float64())
 
     # Ensure rank_quantile can be called without specifying options
-    result = pc.rank_quantile(arr)
+    result = rank_quantile(arr)
     assert result.equals(expected)
 
     # Ensure default RankOptions
-    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions())
+    result = rank_quantile(arr, options=pc.RankQuantileOptions())
     assert result.equals(expected)
 
     # Ensure sort_keys tuple usage
-    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions(
+    result = rank_quantile(arr, options=pc.RankQuantileOptions(
         sort_keys=[("b", "ascending")])
     )
     assert result.equals(expected)
 
-    result = pc.rank_quantile(arr, null_placement="at_start")
+    result = rank_quantile(arr, null_placement="at_start")
     expected_at_start = pa.array([0.3, 0.7, 0.3, 0.9, 0.3], type=pa.float64())
     assert result.equals(expected_at_start)
 
-    result = pc.rank_quantile(arr, sort_keys="descending")
+    result = rank_quantile(arr, sort_keys="descending")
     expected_descending = pa.array([0.7, 0.3, 0.7, 0.1, 0.7], type=pa.float64())
     assert result.equals(expected_descending)
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.rank_quantile(arr, sort_keys="XXX")
+        rank_quantile(arr, sort_keys="XXX")
 
 
 def test_rank_normal_options():
@@ -3556,21 +3591,21 @@ def test_rank_normal_options():
     expected = pytest.approx(
         [0.5244005127080407, -1.2815515655446004, 0.5244005127080407,
          -0.5244005127080409, 0.5244005127080407])
-    result = pc.rank_normal(arr)
+    result = rank_normal(arr)
     assert result.to_pylist() == expected
-    result = pc.rank_normal(arr, null_placement="at_end", sort_keys="ascending")
+    result = rank_normal(arr, null_placement="at_end", sort_keys="ascending")
     assert result.to_pylist() == expected
-    result = pc.rank_normal(arr, options=pc.RankQuantileOptions())
+    result = rank_normal(arr, options=pc.RankQuantileOptions())
     assert result.to_pylist() == expected
 
     expected = pytest.approx(
         [-0.5244005127080409, 1.2815515655446004, -0.5244005127080409,
          0.5244005127080407, -0.5244005127080409])
-    result = pc.rank_normal(arr, null_placement="at_start", sort_keys="descending")
+    result = rank_normal(arr, null_placement="at_start", sort_keys="descending")
     assert result.to_pylist() == expected
-    result = pc.rank_normal(arr,
-                            options=pc.RankQuantileOptions(null_placement="at_start",
-                                                           sort_keys="descending"))
+    result = rank_normal(arr,
+                         options=pc.RankQuantileOptions(null_placement="at_start",
+                                                        sort_keys="descending"))
     assert result.to_pylist() == expected
 
 
@@ -3598,17 +3633,17 @@ def create_sample_expressions():
 
     # These expressions include at least one function call
     exprs_with_call = [a == b, a != b, a > b, c & j, c | j, ~c, d.is_valid(),
-                       a + b, a - b, a * b, a / b, pc.negate(a),
-                       pc.add(a, b), pc.subtract(a, b), pc.divide(a, b),
-                       pc.multiply(a, b), pc.power(a, a), pc.sqrt(a),
-                       pc.exp(b), pc.cos(b), pc.sin(b), pc.tan(b),
-                       pc.acos(b), pc.atan(b), pc.asin(b), pc.atan2(b, b),
-                       pc.sinh(a), pc.cosh(a), pc.tanh(a),
-                       pc.asinh(a), pc.acosh(b), pc.atanh(k),
-                       pc.abs(b), pc.sign(a), pc.bit_wise_not(a),
-                       pc.bit_wise_and(a, a), pc.bit_wise_or(a, a),
-                       pc.bit_wise_xor(a, a), pc.is_nan(b), pc.is_finite(b),
-                       pc.coalesce(a, b),
+                       a + b, a - b, a * b, a / b, negate(a),
+                       pc_add(a, b), subtract(a, b), divide(a, b),
+                       multiply(a, b), power(a, a), sqrt(a),
+                       exp(b), cos(b), sin(b), tan(b),
+                       acos(b), atan(b), asin(b), atan2(b, b),
+                       sinh(a), cosh(a), tanh(a),
+                       asinh(a), acosh(b), atanh(k),
+                       pc_abs(b), sign(a), bit_wise_not(a),
+                       bit_wise_and(a, a), bit_wise_or(a, a),
+                       bit_wise_xor(a, a), is_nan(b), is_finite(b),
+                       coalesce(a, b),
                        a.cast(pa.int32(), safe=False)]
 
     # These expressions test out various reference styles and may include function
@@ -3772,29 +3807,29 @@ def test_expression_call_function():
     field = pc.field("field")
 
     # no options
-    assert str(pc.hour(field)) == "hour(field)"
+    assert str(hour(field)) == "hour(field)"
 
     # default options
-    assert str(pc.round(field)) == "round(field)"
+    assert str(pc_round(field)) == "round(field)"
     # specified options
-    assert str(pc.round(field, ndigits=1)) == \
+    assert str(pc_round(field, ndigits=1)) == \
         "round(field, {ndigits=1, round_mode=HALF_TO_EVEN})"
 
     # Will convert non-expression arguments if possible
-    assert str(pc.add(field, 1)) == "add(field, 1)"
-    assert str(pc.add(field, pa.scalar(1))) == "add(field, 1)"
+    assert str(pc_add(field, 1)) == "add(field, 1)"
+    assert str(pc_add(field, pa.scalar(1))) == "add(field, 1)"
 
     # Invalid pc.scalar input gives original error message
     msg = "only other expressions allowed as arguments"
     with pytest.raises(TypeError, match=msg):
-        pc.add(field, object)
+        pc_add(field, object)
 
 
 def test_cast_table_raises():
     table = pa.table({'a': [1, 2]})
 
-    with pytest.raises(pa.lib.ArrowTypeError):
-        pc.cast(table, pa.int64())
+    with pytest.raises(lib.ArrowTypeError):
+        cast(table, pa.int64())
 
 
 @pytest.mark.parametrize("start,stop,expected", (
@@ -3821,9 +3856,9 @@ def test_list_slice_output_fixed(start, stop, step, expected, value_type,
         msg = ("Unable to produce FixedSizeListArray from "
                "non-FixedSizeListArray without `stop` being set.")
         with pytest.raises(pa.ArrowInvalid, match=msg):
-            pc.list_slice(*args)
+            list_slice(*args)
     else:
-        result = pc.list_slice(*args)
+        result = list_slice(*args)
         pylist = result.cast(pa.list_(pa.int8(),
                              result.type.list_size)).to_pylist()
         assert pylist == [e[::step] if e else e for e in expected]
@@ -3854,8 +3889,8 @@ def test_list_slice_output_variable(start, stop, step, value_type, list_type):
     if list_type == "fixed":
         list_type = pa.list_  # non fixed output type
 
-    result = pc.list_slice(arr, start, stop, step,
-                           return_fixed_size_list=False)
+    result = list_slice(arr, start, stop, step,
+                        return_fixed_size_list=False)
     assert result.type == list_type(value_type())
 
     pylist = result.cast(pa.list_(pa.int8())).to_pylist()
@@ -3872,7 +3907,7 @@ def test_list_slice_output_variable(start, stop, step, value_type, list_type):
     lambda: pa.large_list(pa.field('col', pa.int8()))))
 def test_list_slice_field_names_retained(return_fixed_size, type):
     arr = pa.array([[1]], type())
-    out = pc.list_slice(arr, 0, 1, return_fixed_size_list=return_fixed_size)
+    out = list_slice(arr, 0, 1, return_fixed_size_list=return_fixed_size)
     assert arr.type.field(0).name == out.type.field(0).name
 
     # Verify out type matches in type if return_fixed_size_list==None
@@ -3884,27 +3919,27 @@ def test_list_slice_bad_parameters():
     arr = pa.array([[1]], pa.list_(pa.int8(), 1))
     msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        pc.list_slice(arr, -1, 1)  # negative start?
+        list_slice(arr, -1, 1)  # negative start?
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        pc.list_slice(arr, 2, 1)  # start > stop?
+        list_slice(arr, 2, 1)  # start > stop?
 
     # TODO(ARROW-18281): start==stop -> empty lists
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        pc.list_slice(arr, 0, 0)  # start == stop?
+        list_slice(arr, 0, 0)  # start == stop?
 
     # Step not >= 1
     msg = "`step` must be >= 1, got: "
     with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
-        pc.list_slice(arr, 0, 1, step=0)
+        list_slice(arr, 0, 1, step=0)
     with pytest.raises(pa.ArrowInvalid, match=msg + "-1"):
-        pc.list_slice(arr, 0, 1, step=-1)
+        list_slice(arr, 0, 1, step=-1)
 
 
 def check_run_end_encode_decode(value_type, run_end_encode_opts=None):
     values = [1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3]
     arr = pa.array(values, type=value_type)
-    encoded = pc.run_end_encode(arr, options=run_end_encode_opts)
-    decoded = pc.run_end_decode(encoded)
+    encoded = run_end_encode(arr, options=run_end_encode_opts)
+    decoded = run_end_decode(encoded)
     assert decoded.type == arr.type
     assert decoded.equals(arr)
 
@@ -3941,65 +3976,65 @@ def test_run_end_encode(value_type, option):
 def test_pairwise_diff():
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, 1, 1, None, None, 1])
-    result = pa.compute.pairwise_diff(arr, period=1)
+    result = pairwise_diff(arr, period=1)
     assert result.equals(expected)
 
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, None, 2, None, 1, None])
-    result = pa.compute.pairwise_diff(arr, period=2)
+    result = pairwise_diff(arr, period=2)
     assert result.equals(expected)
 
     # negative period
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.int8())
     expected = pa.array([-1, -1, None, None, -1, None], type=pa.int8())
-    result = pa.compute.pairwise_diff(arr, period=-1)
+    result = pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # wrap around overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     expected = pa.array([255, 255, None, None, 255, None], type=pa.uint8())
-    result = pa.compute.pairwise_diff(arr, period=-1)
+    result = pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # fail on overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     with pytest.raises(pa.ArrowInvalid,
                        match="overflow"):
-        pa.compute.pairwise_diff_checked(arr, period=-1)
+        pairwise_diff_checked(arr, period=-1)
 
 
 def test_pivot_wider():
     key_names = ["width", "height"]
 
-    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11])
+    result = pivot_wider(["height", "width", "depth"], [10, None, 11])
     assert result.as_py() == {}
 
-    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
-                            key_names)
+    result = pivot_wider(["height", "width", "depth"], [10, None, 11],
+                         key_names)
     assert result.as_py() == {"width": None, "height": 10}
     # check key order
     assert list(result.as_py()) == ["width", "height"]
 
-    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
-                            key_names=key_names)
+    result = pivot_wider(["height", "width", "depth"], [10, None, 11],
+                         key_names=key_names)
     assert result.as_py() == {"width": None, "height": 10}
 
     with pytest.raises(KeyError, match="Unexpected pivot key: depth"):
-        result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
-                                key_names=key_names,
-                                unexpected_key_behavior="raise")
+        result = pivot_wider(["height", "width", "depth"], [10, None, 11],
+                             key_names=key_names,
+                             unexpected_key_behavior="raise")
 
     with pytest.raises(ValueError, match="Encountered more than one non-null value"):
-        result = pc.pivot_wider(["height", "width", "height"], [10, None, 11],
-                                key_names=key_names)
+        result = pivot_wider(["height", "width", "height"], [10, None, 11],
+                             key_names=key_names)
 
 
 def test_winsorize():
     arr = pa.array([10, 4, 9, 8, 5, 3, 7, 2, 1, 6])
 
-    result = pc.winsorize(arr, 0.1, 0.8)
+    result = winsorize(arr, 0.1, 0.8)
     assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]
 
-    result = pc.winsorize(
+    result = winsorize(
         arr, options=pc.WinsorizeOptions(lower_limit=0.1, upper_limit=0.8))
     assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 170f62a43bd..71c96835d2c 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1989,7 +1989,8 @@ def test_write_quoting_style():
     buf = io.BytesIO()
     for write_options, res in [
         (WriteOptions(quoting_style='needed'), b'"c1"\n","\n""""\n'),
-        (WriteOptions(quoting_style='none'), pa.lib.ArrowInvalid),
+        (WriteOptions(quoting_style='none'), pa.lib.ArrowInvalid), \
+            # type: ignore[unresolved-attribute]
     ]:
         with CSVWriter(buf, t.schema, write_options=write_options) as writer:
             try:
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index d8298eec773..1ca5a9529e4 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -807,8 +807,9 @@ def test_create_table_with_device_buffers():
 
 
 def other_process_for_test_IPC(handle_buffer, expected_arr):
-    other_context = pa.cuda.Context(0)
-    ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer)
+    other_context = pa.cuda.Context(0)  # type: ignore[unresolved-attribute]
+    ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer) \
+        # type: ignore[unresolved-attribute]
     ipc_buf = other_context.open_ipc_buffer(ipc_handle)
     ipc_buf.context.synchronize()
     buf = ipc_buf.copy_to_host()
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 6303b47bd44..344201ff4f9 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -41,6 +41,9 @@
 import pyarrow.feather
 import pyarrow.fs as fs
 import pyarrow.json
+from pyarrow import lib  # type: ignore[unresolved-attribute]
+from pyarrow.compute import (is_in, hour, days_between, sort_indices, unique) \
+    # type: ignore[unresolved-attribute]
 from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
 from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
                                 _configure_s3_limited_user, _filesystem_uri,
@@ -53,24 +56,21 @@
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow.dataset import (
-        ParquetFragmentScanOptions, ParquetReadOptions, ParquetFileFragment \
-        # type: ignore[possibly-unbound-attribute]
-    )
+    from pyarrow.dataset import ParquetFragmentScanOptions, ParquetReadOptions, \
+        ParquetFileFragment, ParquetFileFormat  # type: ignore[possibly-unbound-attribute]
 except ImportError:
     pass
 
 try:
     from pyarrow.dataset import (
-        OrcFileFormat  # type: ignore[possibly-unbound-attribute]
+        OrcFileFormat  # type: ignore[possibly-unbound-import]
     )
 except ImportError:
     pass
 
 try:
-    import pyarrow.parquet as pq
-    from pyarrow.parquet import ParquetFileFormat \
-        # type: ignore[possibly-unbound-attribute]
+    import pyarrow.parquet as pq \
+        # type: ignore[unresolved-import]
 except ImportError:
     pass
 
@@ -1276,7 +1276,7 @@ def test_make_fragment_with_size(s3_example_simple):
         fragments_with_size, format=file_format, schema=table.schema, filesystem=fs
     )
 
-    with pytest.raises(pyarrow.lib.ArrowInvalid, match='Parquet file size is 1 bytes'):
+    with pytest.raises(lib.ArrowInvalid, match='Parquet file size is 1 bytes'):
         table = dataset_with_size.to_table()
 
     # too large sizes -> error
@@ -3158,13 +3158,13 @@ def test_filter_compute_expression(tempdir, dataset_reader):
     _, path = _create_single_file(tempdir, table)
     dataset = ds.dataset(str(path))
 
-    filter_ = pc.is_in(ds.field('A'), pa.array(["a", "b"]))
+    filter_ = is_in(ds.field('A'), pa.array(["a", "b"]))
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 3
 
-    filter_ = pc.hour(ds.field('B')) >= 3
+    filter_ = hour(ds.field('B')) >= 3
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 2
 
-    days = pc.days_between(ds.field('B'), ds.field("C"))
+    days = days_between(ds.field('B'), ds.field("C"))
     result = dataset_reader.to_table(dataset, columns={"days": days})
     assert result["days"].to_pylist() == [0, 1, 2, 3, 4]
 
@@ -3687,7 +3687,7 @@ def test_column_names_encoding(tempdir, dataset_reader):
 
     # Reading as string without specifying encoding should produce an error
     dataset = ds.dataset(path, format='csv', schema=expected_schema)
-    with pytest.raises(pyarrow.lib.ArrowInvalid, match="invalid UTF8"):
+    with pytest.raises(lib.ArrowInvalid, match="invalid UTF8"):
         dataset_reader.to_table(dataset)
 
     # Setting the encoding in the read_options should transcode the data
@@ -4189,7 +4189,7 @@ def test_write_to_dataset_given_null_just_works(tempdir):
 
 def _sort_table(tab, sort_col):
     import pyarrow.compute as pc
-    sorted_indices = pc.sort_indices(
+    sorted_indices = sort_indices(
         tab, options=pc.SortOptions([(sort_col, 'ascending')]))
     return pc.take(tab, sorted_indices)
 
@@ -4637,7 +4637,7 @@ def test_write_dataset_max_open_files(tempdir):
     def _get_compare_pair(data_source, record_batch, file_format, col_id):
         num_of_files_generated = _get_num_of_files_generated(
             base_directory=data_source, file_format=file_format)
-        number_of_partitions = len(pa.compute.unique(record_batch[col_id]))
+        number_of_partitions = len(unique(record_batch[col_id]))
         return num_of_files_generated, number_of_partitions
 
     # CASE 1: when max_open_files=default & max_open_files >= num_of_partitions
diff --git a/python/pyarrow/tests/test_exec_plan.py b/python/pyarrow/tests/test_exec_plan.py
index d85a2c21524..177f3baa378 100644
--- a/python/pyarrow/tests/test_exec_plan.py
+++ b/python/pyarrow/tests/test_exec_plan.py
@@ -220,13 +220,14 @@ def test_table_join_keys_order():
 
 
 def test_filter_table_errors():
+    from pyarrow.compute import divide  # type: ignore[unresolved-attribute]
     t = pa.table({
         "a": [1, 2, 3, 4, 5],
         "b": [10, 20, 30, 40, 50]
     })
 
     with pytest.raises(pa.ArrowTypeError):
-        _filter_table(t, pc.divide(pc.field("a"), pc.scalar(2)))
+        _filter_table(t, divide(pc.field("a"), pc.scalar(2)))
 
     with pytest.raises(pa.ArrowInvalid):
         _filter_table(t, (pc.field("Z") <= pc.scalar(2)))
@@ -267,14 +268,16 @@ def test_filter_table_ordering():
 
 
 def test_complex_filter_table():
+    from pyarrow.compute import bit_wise_and, multiply \
+        # type: ignore[unresolved-attribute]
     t = pa.table({
         "a": [1, 2, 3, 4, 5, 6, 6],
         "b": [10, 20, 30, 40, 50, 60, 61]
     })
 
     result = _filter_table(
-        t, ((pc.bit_wise_and(pc.field("a"), pc.scalar(1)) == pc.scalar(0)) &
-            (pc.multiply(pc.field("a"), pc.scalar(10)) == pc.field("b")))
+        t, ((bit_wise_and(pc.field("a"), pc.scalar(1)) == pc.scalar(0)) &
+            (multiply(pc.field("a"), pc.scalar(10)) == pc.field("b")))
     )
 
     assert result == pa.table({
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 58aabb7368e..4b1641557e7 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -158,10 +158,10 @@ def select_frame(self, func_name):
         m = re.search(pat, out)
         if m is None:
             pytest.fail(f"Could not select frame for function {func_name}")
-
-        frame_num = int(m.get(1, None))
-        out = self.run_command(f"frame {frame_num}")
-        assert f"in {func_name}" in out
+        else:
+            frame_num = int(m[1])
+            out = self.run_command(f"frame {frame_num}")
+            assert f"in {func_name}" in out
 
     def join(self):
         if self.proc is not None:
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 43fd0e1ac0e..bea9a929673 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -35,6 +35,11 @@
 except ImportError:
     pass
 
+try:
+    from pyarrow import lib  # type: ignore[unresolved-attribute]
+except ImportError:
+    pass
+
 from pyarrow.util import guid
 from pyarrow import Codec
 import pyarrow as pa
@@ -812,7 +817,7 @@ def test_cache_options_pickling(pickle_module):
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+        "bz2", marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
     ),
     "brotli",
     "gzip",
@@ -853,7 +858,7 @@ def test_compress_decompress(compression):
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+        "bz2", marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
     ),
     "brotli",
     "gzip",
@@ -1725,7 +1730,7 @@ def test_output_stream_constructor(tmpdir):
 ])
 def test_compression_detection(path, expected_compression):
     if not Codec.is_available(expected_compression):
-        with pytest.raises(pa.lib.ArrowNotImplementedError):
+        with pytest.raises(lib.ArrowNotImplementedError):
             Codec.detect(path)
     else:
         codec = Codec.detect(path)
@@ -1750,7 +1755,7 @@ def test_unknown_compression_raises():
     "zstd",
     pytest.param(
         "snappy",
-        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+        marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
     )
 ])
 def test_compressed_roundtrip(compression):
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index ed6e7563ed2..77018f93a24 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -33,6 +33,10 @@
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
 
+try:
+    from pyarrow import lib  # type: ignore[unresolved-attribute]
+except ImportError:
+    pass
 
 try:
     from pandas.testing import assert_frame_equal
@@ -1234,7 +1238,7 @@ def __arrow_c_stream__(self, requested_schema=None):
     assert reader.read_all() == expected.cast(good_schema)
 
     # If schema doesn't match, raises TypeError
-    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
+    with pytest.raises(lib.ArrowTypeError, match='Field 0 cannot be cast'):
         pa.RecordBatchReader.from_stream(
             wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
         )
@@ -1271,7 +1275,7 @@ def test_record_batch_reader_cast():
 
     # Check error for impossible cast in call to .cast()
     reader = pa.RecordBatchReader.from_batches(schema_src, data)
-    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
+    with pytest.raises(lib.ArrowTypeError, match='Field 0 cannot be cast'):
         reader.cast(pa.schema([pa.field('a', pa.list_(pa.int32()))]))
 
     # Cast to same type should always work (also for types without a T->T cast function)
@@ -1309,7 +1313,7 @@ def test_record_batch_reader_cast_nulls():
     # when the batch is pulled
     reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls)
     casted_reader = reader.cast(schema_dst)
-    with pytest.raises(pa.lib.ArrowInvalid, match="Can't cast array"):
+    with pytest.raises(lib.ArrowInvalid, match="Can't cast array"):
         casted_reader.read_all()
 
 
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 3c3d874395e..9f15bc73c5b 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -32,7 +32,8 @@
     import numpy as np
     import numpy.testing as npt
     try:
-        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning \
+            # type: ignore[unresolved-attribute]
     except AttributeError:
         from numpy.exceptions import (
             VisibleDeprecationWarning as _np_VisibleDeprecationWarning
@@ -47,6 +48,7 @@
 from pyarrow.vendored.version import Version
 
 import pyarrow as pa
+from pyarrow import lib  # type: ignore[unresolved-attribute]
 try:
     from pyarrow import parquet as pq
 except ImportError:
@@ -1939,7 +1941,7 @@ def test_array_of_bytes_to_strings(self):
     # cannot be converted to utf-8
     def test_array_of_bytes_to_strings_bad_data(self):
         with pytest.raises(
-                pa.lib.ArrowInvalid,
+                lib.ArrowInvalid,
                 match="was not a utf8 string"):
             pa.array(np.array([b'\x80\x81'], dtype=object), pa.string())
 
@@ -1955,13 +1957,13 @@ def test_numpy_string_array_to_fixed_size_binary(self):
         expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3))
         assert converted.equals(expected)
 
-        with pytest.raises(pa.lib.ArrowInvalid,
+        with pytest.raises(lib.ArrowInvalid,
                            match=r'Got bytestring of length 3 \(expected 4\)'):
             arr = np.array([b'foo', b'bar', b'baz'], dtype='|S3')
             pa.array(arr, type=pa.binary(4))
 
         with pytest.raises(
-                pa.lib.ArrowInvalid,
+                lib.ArrowInvalid,
                 match=r'Got bytestring of length 12 \(expected 3\)'):
             arr = np.array([b'foo', b'bar', b'baz'], dtype='|U3')
             pa.array(arr, type=pa.binary(3))
@@ -4432,7 +4434,8 @@ def test_convert_to_extension_array(monkeypatch):
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__") \
+            # type: ignore[unresolved-attribute]
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
     assert len(_get_mgr(result).blocks) == 1
@@ -4478,7 +4481,8 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
             integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__") \
+            # type: ignore[unresolved-attribute]
 
     result = arr.to_pandas()
     assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
@@ -5122,7 +5126,7 @@ def test_roundtrip_map_array_with_pydicts_duplicate_keys():
 
     # ------------------------
     # With maps as pydicts
-    with pytest.raises(pa.lib.ArrowException):
+    with pytest.raises(lib.ArrowException):
         # raises because of duplicate keys
         maps.to_pandas(maps_as_pydicts="strict")
     series_pydicts = maps.to_pandas(maps_as_pydicts="lossy")
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index cdbe3d00aae..407c69263e8 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -201,7 +201,7 @@ def test_timestamp_scalar():
     assert b == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00+0000'>"
     c = repr(pa.scalar(datetime.datetime(2015, 1, 1), type=pa.timestamp('us')))
     assert c == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00.000000'>"
-    d = repr(pc.assume_timezone(
+    d = repr(pc.assume_timezone(  # type: ignore[unresolved-attribute]
         pa.scalar("2000-01-01").cast(pa.timestamp("s")), "America/New_York"))
     assert d == "<pyarrow.TimestampScalar: '2000-01-01T00:00:00-0500'>"
 
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index dc98f03cded..48af7b143ff 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -627,11 +627,11 @@ def test_type_schema_pickling(pickle_module):
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
-        ], pa.lib.UnionMode_SPARSE),
+        ], pa.lib.UnionMode_SPARSE),  # type: ignore[unresolved-attribute]
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
-        ], pa.lib.UnionMode_DENSE),
+        ], pa.lib.UnionMode_DENSE),  # type: ignore[unresolved-attribute]
         pa.time32('s'),
         pa.time64('us'),
         pa.date32(),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index 6a398f38ac5..89823e04943 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -26,15 +26,14 @@
 import pyarrow as pa
 
 try:
-    import scipy
     from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
 except ImportError:
-    pass
+    pytestmark = pytest.mark.scipy
 
 try:
     import sparse  # type: ignore[unresolved_import]
 except ImportError:
-    pass
+    pytestmark = pytest.mark.pydata_sparse
 
 
 tensor_type_pairs = [
@@ -399,7 +398,7 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
     assert np.array_equal(array, result_array)
 
 
-@pytest.mark.skipif(not scipy, reason="requires scipy")
+@pytest.mark.scipy
 @pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
@@ -441,7 +440,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
     assert out_scipy_matrix.has_canonical_format
 
 
-@pytest.mark.skipif(not scipy, reason="requires scipy")
+@pytest.mark.scipy
 @pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
@@ -469,7 +468,7 @@ def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
     assert np.array_equal(dense_array, sparse_tensor.to_tensor().to_numpy())
 
 
-@pytest.mark.skipif(not sparse, reason="requires pydata/sparse")
+@pytest.mark.pydata_sparse
 @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
 def test_pydata_sparse_sparse_coo_tensor_roundtrip(dtype_str, arrow_type):
     dtype = np.dtype(dtype_str)
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index babb839b534..0fe9508aef0 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -19,29 +19,29 @@
 
 import pytest
 
-import pyarrow as pa
+from pyarrow import lib  # type: ignore[unresolved-attribute]
 import pyarrow.tests.strategies as past
 
 
 @h.given(past.all_types)
 def test_types(ty):
-    assert isinstance(ty, pa.lib.DataType)
+    assert isinstance(ty, lib.DataType)
 
 
 @h.given(past.all_fields)
 def test_fields(field):
-    assert isinstance(field, pa.lib.Field)
+    assert isinstance(field, lib.Field)
 
 
 @h.given(past.all_schemas)
 def test_schemas(schema):
-    assert isinstance(schema, pa.lib.Schema)
+    assert isinstance(schema, lib.Schema)
 
 
 @pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
-    assert isinstance(array, pa.lib.Array)
+    assert isinstance(array, lib.Array)
 
 
 @pytest.mark.numpy
@@ -52,15 +52,15 @@ def test_array_nullability(array):
 
 @h.given(past.chunked_arrays(past.primitive_types))
 def test_chunked_arrays(chunked_array):
-    assert isinstance(chunked_array, pa.lib.ChunkedArray)
+    assert isinstance(chunked_array, lib.ChunkedArray)
 
 
 @h.given(past.all_record_batches)
 def test_record_batches(record_bath):
-    assert isinstance(record_bath, pa.lib.RecordBatch)
+    assert isinstance(record_bath, lib.RecordBatch)
 
 
 @pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
-    assert isinstance(table, pa.lib.Table)
+    assert isinstance(table, lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index d3f5d848bce..8ac0951e489 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -22,6 +22,8 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.compute import equal  # type: ignore[unresolved-attribute]
+from pyarrow import _substrait  # type: ignore[unresolved-attribute]
 from pyarrow.lib import tobytes  # type: ignore[unresolved_import]
 from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError \
     # type: ignore[unresolved_import]
@@ -86,7 +88,7 @@ def test_run_serialized_query(tmpdir, use_threads):
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
 
-    buf = pa._substrait._parse_json_plan(query)
+    buf = _substrait._parse_json_plan(query)
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -117,7 +119,7 @@ def test_invalid_plan():
         ]
     }
     """
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
     exec_message = "Plan has no relations"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf)
@@ -163,7 +165,7 @@ def test_binary_conversion_with_json_options(tmpdir, use_threads):
     path = _write_dummy_data_to_disk(tmpdir, file_name, table)
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -182,7 +184,7 @@ def has_function(fns, ext_file, fn_name):
 
 
 def test_get_supported_functions():
-    supported_functions = pa._substrait.get_supported_functions()
+    supported_functions = _substrait.get_supported_functions()
     # It probably doesn't make sense to exhaustively verify this list but
     # we can check a sample aggregate and a sample non-aggregate entry
     assert has_function(supported_functions,
@@ -233,7 +235,7 @@ def table_provider(names, schema):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -276,7 +278,7 @@ def table_provider(names, _):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
     exec_message = "Invalid NamedTable Source"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -318,7 +320,7 @@ def table_provider(names, _):
     }
     """
     query = tobytes(substrait_query)
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
     exec_message = "names for NamedTable not provided"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -437,7 +439,7 @@ def table_provider(names, _):
 }
     """
 
-    buf = pa._substrait._parse_json_plan(substrait_query)
+    buf = _substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -560,7 +562,7 @@ def table_provider(names, _):
 }
     """
 
-    buf = pa._substrait._parse_json_plan(substrait_query)
+    buf = _substrait._parse_json_plan(substrait_query)
     with pytest.raises(pa.ArrowKeyError) as excinfo:
         pa.substrait.run_query(buf, table_provider=table_provider)
     assert "No function registered" in str(excinfo.value)
@@ -599,7 +601,7 @@ def table_provider(names, schema):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -745,7 +747,7 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = pa._substrait._parse_json_plan(substrait_query)
+    buf = _substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
@@ -914,7 +916,7 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = pa._substrait._parse_json_plan(substrait_query)
+    buf = _substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
@@ -930,8 +932,8 @@ def table_provider(names, _):
 
 
 @pytest.mark.parametrize("expr", [
-    pc.equal(pc.field("x"), 7),
-    pc.equal(pc.field("x"), pc.field("y")),
+    equal(pc.field("x"), 7),
+    equal(pc.field("x"), pc.field("y")),
     pc.field("x") > 50
 ])
 def test_serializing_expressions(expr):
@@ -986,7 +988,7 @@ def test_arrow_one_way_types():
     )
 
     def check_one_way(field):
-        expr = pc.is_null(pc.field(field.name))
+        expr = pc.is_null(pc.field(field.name))  # type: ignore[unresolved-attribute]
         buf = pa.substrait.serialize_expressions([expr], ["test_expr"], schema)
         returned = pa.substrait.deserialize_expressions(buf)
         assert alt_schema == returned.schema
@@ -1000,8 +1002,8 @@ def test_invalid_expression_ser_des():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    expr = pc.equal(pc.field("x"), 7)
-    bad_expr = pc.equal(pc.field("z"), 7)
+    expr = equal(pc.field("x"), 7)
+    bad_expr = equal(pc.field("z"), 7)
     # Invalid number of names
     with pytest.raises(ValueError) as excinfo:
         pa.substrait.serialize_expressions([expr], [], schema)
@@ -1020,13 +1022,13 @@ def test_serializing_multiple_expressions():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    exprs = [pc.equal(pc.field("x"), 7), pc.equal(pc.field("x"), pc.field("y"))]
+    exprs = [equal(pc.field("x"), 7), equal(pc.field("x"), pc.field("y"))]
     buf = pa.substrait.serialize_expressions(exprs, ["first", "second"], schema)
     returned = pa.substrait.deserialize_expressions(buf)
     assert schema == returned.schema
     assert len(returned.expressions) == 2
 
-    norm_exprs = [pc.equal(pc.field(0), 7), pc.equal(pc.field(0), pc.field(1))]
+    norm_exprs = [equal(pc.field(0), 7), equal(pc.field(0), pc.field(1))]
     assert str(returned.expressions["first"]) == str(norm_exprs[0])
     assert str(returned.expressions["second"]) == str(norm_exprs[1])
 
@@ -1036,8 +1038,8 @@ def test_serializing_with_compute():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    expr = pc.equal(pc.field("x"), 7)
-    expr_norm = pc.equal(pc.field(0), 7)
+    expr = equal(pc.field("x"), 7)
+    expr_norm = equal(pc.field(0), 7)
     buf = expr.to_substrait(schema)
     returned = pa.substrait.deserialize_expressions(buf)
 
@@ -1067,7 +1069,7 @@ def test_serializing_udfs():
     ])
     a = pc.scalar(10)
     b = pc.scalar(4)
-    exprs = [pc.shift_left(a, b)]
+    exprs = [pc.shift_left(a, b)]  # type: ignore[unresolved-attribute]
 
     with pytest.raises(ArrowNotImplementedError):
         pa.substrait.serialize_expressions(exprs, ["expr"], schema)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index e7726fd0023..64624c93f1e 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -26,6 +26,7 @@
     pass
 import pytest
 import pyarrow as pa
+from pyarrow import lib  # type: ignore[unresolved-import]
 import pyarrow.compute as pc
 from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
@@ -49,8 +50,8 @@ def test_chunked_array_basics():
         [7, 8, 9]
     ])
     assert isinstance(data.chunks, list)
-    assert all(isinstance(c, pa.lib.Int64Array) for c in data.chunks)
-    assert all(isinstance(c, pa.lib.Int64Array) for c in data.iterchunks())
+    assert all(isinstance(c, lib.Int64Array) for c in data.chunks)
+    assert all(isinstance(c, lib.Int64Array) for c in data.iterchunks())
     assert len(data.chunks) == 3
     assert data.get_total_buffer_size() == sum(c.get_total_buffer_size()
                                                for c in data.iterchunks())
@@ -650,7 +651,7 @@ def __arrow_c_stream__(self, requested_schema=None):
 
     # If schema doesn't match, raises NotImplementedError
     with pytest.raises(
-        pa.lib.ArrowTypeError, match="Field 0 cannot be cast"
+        lib.ArrowTypeError, match="Field 0 cannot be cast"
     ):
         pa.table(
             wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
@@ -2230,7 +2231,7 @@ def test_invalid_table_construct():
     u8 = pa.uint8()
     arrays = [pa.array(array, type=u8), pa.array(array[1:], type=u8)]
 
-    with pytest.raises(pa.lib.ArrowInvalid):
+    with pytest.raises(lib.ArrowInvalid):
         pa.Table.from_arrays(arrays, names=["a1", "a2"])
 
 
@@ -3299,7 +3300,7 @@ def test_table_join_asof_by_length_mismatch():
     })
 
     msg = "inconsistent size of by-key across inputs"
-    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+    with pytest.raises(lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA", "colB"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3321,7 +3322,7 @@ def test_table_join_asof_by_type_mismatch():
     })
 
     msg = "Expected by-key type int64 but got double for field colA in input 1"
-    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+    with pytest.raises(lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3343,7 +3344,7 @@ def test_table_join_asof_on_type_mismatch():
     })
 
     msg = "Expected on-key type int64 but got double for field on in input 1"
-    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+    with pytest.raises(lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3470,14 +3471,14 @@ def test_invalid_non_join_column():
     })
 
     # check as left table
-    with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
+    with pytest.raises(lib.ArrowInvalid) as excinfo:
         t1.join(t2, 'id', join_type='inner')
     exp_error_msg = "Data type list<item: int64> is not supported " \
         + "in join non-key field array_column"
     assert exp_error_msg in str(excinfo.value)
 
     # check as right table
-    with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
+    with pytest.raises(lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 5e5f4903e29..4077b302f71 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -38,6 +38,8 @@
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
 
+from pyarrow import lib  # type: ignore[unresolved-import]
+
 
 def get_many_types():
     # returning them from a function is required because of pa.dictionary
@@ -83,14 +85,14 @@ def get_many_types():
                    pa.field('b', pa.int8(), nullable=False),
                    pa.field('c', pa.string())]),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
+                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE,
+                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE,
                  type_codes=[4, 8]),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
         pa.union([pa.field('a', pa.binary(10), nullable=False),
-                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
         pa.dictionary(pa.int32(), pa.string()),
         pa.run_end_encoded(pa.int16(), pa.int32()),
         pa.run_end_encoded(pa.int32(), pa.string()),
@@ -247,7 +249,7 @@ def test_is_nested_or_struct():
 
 
 def test_is_union():
-    for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]:
+    for mode in [lib.UnionMode_SPARSE, lib.UnionMode_DENSE]:
         assert types.is_union(pa.union([pa.field('a', pa.int32()),
                                         pa.field('b', pa.int8()),
                                         pa.field('c', pa.string())],
@@ -353,7 +355,7 @@ def test_is_primitive():
     (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30')
 ])
 def test_tzinfo_to_string(tz, expected):
-    assert pa.lib.tzinfo_to_string(tz) == expected
+    assert lib.tzinfo_to_string(tz) == expected
 
 
 def test_pytz_tzinfo_to_string():
@@ -361,13 +363,13 @@ def test_pytz_tzinfo_to_string():
 
     tz = [pytz.utc, pytz.timezone('Europe/Paris')]
     expected = ['UTC', 'Europe/Paris']
-    assert [pa.lib.tzinfo_to_string(i) for i in tz] == expected
+    assert [lib.tzinfo_to_string(i) for i in tz] == expected
 
     # StaticTzInfo.tzname returns with '-09' so we need to infer the timezone's
     # name from the tzinfo.zone attribute
     tz = [pytz.timezone('Etc/GMT-9'), pytz.FixedOffset(180)]
     expected = ['Etc/GMT-9', '+03:00']
-    assert [pa.lib.tzinfo_to_string(i) for i in tz] == expected
+    assert [lib.tzinfo_to_string(i) for i in tz] == expected
 
 
 @pytest.mark.timezone_data
@@ -381,9 +383,9 @@ def test_dateutil_tzinfo_to_string():
     import dateutil.tz
 
     tz = dateutil.tz.UTC
-    assert pa.lib.tzinfo_to_string(tz) == 'UTC'
+    assert lib.tzinfo_to_string(tz) == 'UTC'
     tz = dateutil.tz.gettz('Europe/Paris')
-    assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris'
+    assert lib.tzinfo_to_string(tz) == 'Europe/Paris'
 
 
 @pytest.mark.timezone_data
@@ -395,20 +397,20 @@ def test_zoneinfo_tzinfo_to_string():
         pytest.importorskip('tzdata')
 
     tz = zoneinfo.ZoneInfo('UTC')
-    assert pa.lib.tzinfo_to_string(tz) == 'UTC'
+    assert lib.tzinfo_to_string(tz) == 'UTC'
     tz = zoneinfo.ZoneInfo('Europe/Paris')
-    assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris'
+    assert lib.tzinfo_to_string(tz) == 'Europe/Paris'
 
 
 def test_tzinfo_to_string_errors():
     msg = "Not an instance of datetime.tzinfo"
     with pytest.raises(TypeError):
-        pa.lib.tzinfo_to_string("Europe/Budapest")
+        lib.tzinfo_to_string("Europe/Budapest")
 
     tz = datetime.timezone(datetime.timedelta(hours=1, seconds=30))
     msg = "Offset must represent whole number of minutes"
     with pytest.raises(ValueError, match=msg):
-        pa.lib.tzinfo_to_string(tz)
+        lib.tzinfo_to_string(tz)
 
 
 if tzst:
@@ -421,8 +423,8 @@ def test_tzinfo_to_string_errors():
 def test_pytz_timezone_roundtrip(tz):
     if tz is None:
         pytest.skip('requires timezone not None')
-    timezone_string = pa.lib.tzinfo_to_string(tz)
-    timezone_tzinfo = pa.lib.string_to_tzinfo(timezone_string)
+    timezone_string = lib.tzinfo_to_string(tz)
+    timezone_tzinfo = lib.string_to_tzinfo(timezone_string)
     assert timezone_tzinfo == tz
 
 
@@ -482,14 +484,14 @@ def tzname(self, dt):
         def utcoffset(self, dt):
             return None
 
-    assert pa.lib.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
-    assert pa.lib.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
+    assert lib.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
+    assert lib.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
 
     msg = (r"Object returned by tzinfo.utcoffset\(None\) is not an instance "
            r"of datetime.timedelta")
     for wrong in [BuggyTimezone1(), BuggyTimezone2(), BuggyTimezone3()]:
         with pytest.raises(ValueError, match=msg):
-            pa.lib.tzinfo_to_string(wrong)
+            lib.tzinfo_to_string(wrong)
 
 
 def test_string_to_tzinfo():
@@ -499,7 +501,7 @@ def test_string_to_tzinfo():
         expected = [pytz.utc, pytz.timezone('Europe/Paris'),
                     pytz.FixedOffset(180), pytz.FixedOffset(90),
                     pytz.FixedOffset(-120)]
-        result = [pa.lib.string_to_tzinfo(i) for i in string]
+        result = [lib.string_to_tzinfo(i) for i in string]
         assert result == expected
 
     except ImportError:
@@ -511,7 +513,7 @@ def test_string_to_tzinfo():
                         datetime.timezone(
                             datetime.timedelta(hours=1, minutes=30)),
                         datetime.timezone(-datetime.timedelta(hours=2))]
-            result = [pa.lib.string_to_tzinfo(i) for i in string]
+            result = [lib.string_to_tzinfo(i) for i in string]
             assert result == expected
 
         except ImportError:
@@ -525,8 +527,8 @@ def test_timezone_string_roundtrip_pytz():
           pytz.utc, pytz.timezone('America/New_York')]
     name = ['+01:30', '-01:30', 'UTC', 'America/New_York']
 
-    assert [pa.lib.tzinfo_to_string(i) for i in tz] == name
-    assert [pa.lib.string_to_tzinfo(i)for i in name] == tz
+    assert [lib.tzinfo_to_string(i) for i in tz] == name
+    assert [lib.string_to_tzinfo(i)for i in name] == tz
 
 
 def test_timestamp():
@@ -797,13 +799,13 @@ def check_fields(ty, fields):
 
     sparse_factories = [
         partial(pa.union, mode='sparse'),
-        partial(pa.union, mode=pa.lib.UnionMode_SPARSE),
+        partial(pa.union, mode=lib.UnionMode_SPARSE),
         pa.sparse_union,
     ]
 
     dense_factories = [
         partial(pa.union, mode='dense'),
-        partial(pa.union, mode=pa.lib.UnionMode_DENSE),
+        partial(pa.union, mode=lib.UnionMode_DENSE),
         pa.dense_union,
     ]
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 8e7f14d3c46..9c16ee08892 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -111,7 +111,7 @@ fallback_version = '22.0.0a0'
 #possibly-unbound-import = "ignore"
 #too-many-positional-arguments = "ignore"
 #unknown-argument = "ignore"
-unresolved-attribute = "ignore"
+#unresolved-attribute = "ignore"
 #unresolved-global = "ignore"
 #unresolved-import = "ignore"
 #unresolved-reference = "ignore"
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index e99c4751680..9ce8cbb2ba7 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -71,7 +71,7 @@ def func():
         writer.close()
 
         buf_reader = pa.BufferReader(sink.getvalue())
-        reader = pa.open_file(buf_reader)
+        reader = pa.ipc.open_file(buf_reader)
         reader.read_all()
 
     assert_does_not_leak(func, iterations=50, tolerance=50)

From 98f258b5b3fcbe6262445c0c8ede6e693873bd19 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 26 Jul 2025 17:37:33 +0200
Subject: [PATCH 31/32] Revert test changes

---
 .../tests/interchange/test_conversion.py      |    2 +-
 .../interchange/test_interchange_spec.py      |    8 +-
 python/pyarrow/tests/parquet/common.py        |    4 +-
 python/pyarrow/tests/parquet/test_basic.py    |   11 +-
 .../parquet/test_compliant_nested_type.py     |    6 +-
 .../pyarrow/tests/parquet/test_data_types.py  |    9 +-
 python/pyarrow/tests/parquet/test_dataset.py  |    6 +-
 python/pyarrow/tests/parquet/test_datetime.py |    6 +-
 .../pyarrow/tests/parquet/test_encryption.py  |    3 +-
 python/pyarrow/tests/parquet/test_metadata.py |   12 +-
 python/pyarrow/tests/parquet/test_pandas.py   |   29 +-
 .../tests/parquet/test_parquet_file.py        |   10 +-
 .../tests/parquet/test_parquet_writer.py      |    4 +-
 python/pyarrow/tests/strategies.py            |   10 +-
 python/pyarrow/tests/test_acero.py            |   25 +-
 .../pyarrow/tests/test_adhoc_memory_leak.py   |    2 +-
 python/pyarrow/tests/test_array.py            |   30 +-
 python/pyarrow/tests/test_builder.py          |    3 +-
 python/pyarrow/tests/test_cffi.py             |    7 +-
 python/pyarrow/tests/test_compute.py          | 1110 ++++++++---------
 python/pyarrow/tests/test_convert_builtin.py  |    2 +-
 python/pyarrow/tests/test_cpp_internals.py    |    2 +-
 python/pyarrow/tests/test_csv.py              |    5 +-
 python/pyarrow/tests/test_cuda.py             |    9 +-
 .../pyarrow/tests/test_cuda_numba_interop.py  |   20 +-
 python/pyarrow/tests/test_cython.py           |    4 +-
 python/pyarrow/tests/test_dataset.py          |  174 ++-
 .../pyarrow/tests/test_dataset_encryption.py  |   38 +-
 python/pyarrow/tests/test_exec_plan.py        |    9 +-
 python/pyarrow/tests/test_extension_type.py   |    8 +-
 python/pyarrow/tests/test_feather.py          |   17 +-
 python/pyarrow/tests/test_flight.py           |   48 +-
 python/pyarrow/tests/test_fs.py               |   51 +-
 python/pyarrow/tests/test_gandiva.py          |   24 +-
 python/pyarrow/tests/test_gdb.py              |    8 +-
 python/pyarrow/tests/test_io.py               |   15 +-
 python/pyarrow/tests/test_ipc.py              |   12 +-
 python/pyarrow/tests/test_json.py             |    6 +-
 python/pyarrow/tests/test_jvm.py              |   23 +-
 python/pyarrow/tests/test_misc.py             |    2 +-
 python/pyarrow/tests/test_pandas.py           |   55 +-
 python/pyarrow/tests/test_scalars.py          |    4 +-
 python/pyarrow/tests/test_schema.py           |    6 +-
 python/pyarrow/tests/test_sparse_tensor.py    |   15 +-
 python/pyarrow/tests/test_strategies.py       |   16 +-
 python/pyarrow/tests/test_substrait.py        |   55 +-
 python/pyarrow/tests/test_table.py            |   23 +-
 python/pyarrow/tests/test_types.py            |   57 +-
 python/pyarrow/tests/test_udf.py              |    6 +-
 python/pyarrow/tests/util.py                  |    3 +-
 python/pyarrow/tests/wsgi_examples.py         |    2 +-
 python/pyproject.toml                         |    3 +
 52 files changed, 949 insertions(+), 1070 deletions(-)

diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index a584f379738..50da6693aff 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 14e2aab4bfb..cea694d1c1e 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -18,14 +18,14 @@
 import ctypes
 import hypothesis as h
 import hypothesis.strategies as st
-import pyarrow as pa
-import pyarrow.tests.strategies as past
-import pytest
 
+import pytest
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
+import pyarrow as pa
+import pyarrow.tests.strategies as past
 
 
 all_types = st.deferred(
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 8ce804262d1..4f5946649b8 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util
@@ -41,7 +41,7 @@ def _write_table(table, path, **kwargs):
 def _read_table(*args, **kwargs):
     import pyarrow.parquet as pq
 
-    table = pq.read_table(*args, **kwargs)  # type: ignore[missing-argument]
+    table = pq.read_table(*args, **kwargs)
     table.validate(full=True)
     return table
 
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 7a0dfcde270..67515c5e247 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import os
+from collections import OrderedDict
 import io
 import warnings
 from shutil import copytree
@@ -33,7 +34,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -43,12 +44,12 @@
     from pyarrow.tests.pandas_examples import dataframe_with_lists
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pass
+    pd = tm = None
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
@@ -229,11 +230,11 @@ def test_empty_table_no_columns():
 
 def test_write_nested_zero_length_array_chunk_failure():
     # Bug report in ARROW-3792
-    cols = dict(
+    cols = OrderedDict(
         int32=pa.int32(),
         list_string=pa.list_(pa.string())
     )
-    data = [[], [dict(int32=1, list_string=('G',)), ]]
+    data = [[], [OrderedDict(int32=1, list_string=('G',)), ]]
 
     # This produces a table with a column like
     # <Column name='list_string' type=ListType(list<item: string>)>
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index d7388be8a1b..2345855a332 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -24,13 +24,15 @@
     from pyarrow.tests.parquet.common import (_read_table,
                                               _check_roundtrip)
 except ImportError:
-    pass
+    pq = None
 
 try:
     import pandas as pd
+    import pandas.testing as tm
+
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 9f8f5212382..c546bc1532a 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -33,7 +33,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -44,7 +44,7 @@
                                                dataframe_with_lists)
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
@@ -390,8 +390,7 @@ def test_parquet_nested_convenience(tempdir):
 
     read = pq.read_table(
         path, columns=['a'])
-    tm.assert_frame_equal(read.to_pandas(), df[['a']]) \
-        # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(read.to_pandas(), df[['a']])
 
     read = pq.read_table(
         path, columns=['a', 'b'])
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 1e6897f703d..b8939443c1d 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 import unittest.mock as mock
 
@@ -40,7 +40,7 @@
     from pyarrow.tests.parquet.common import (
         _read_table, _test_dataframe, _write_table)
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -48,7 +48,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 7a95debca3f..b89fd97cb91 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -32,7 +32,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -41,7 +41,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py
index 5815d65c8d8..a11a4935a1c 100644
--- a/python/pyarrow/tests/parquet/test_encryption.py
+++ b/python/pyarrow/tests/parquet/test_encryption.py
@@ -22,7 +22,8 @@
     import pyarrow.parquet as pq
     import pyarrow.parquet.encryption as pe
 except ImportError:
-    pass
+    pq = None
+    pe = None
 else:
     from pyarrow.tests.parquet.encryption import (
         InMemoryKmsClient, verify_file_encrypted)
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 3386f77bb1a..148bfebaa67 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -35,14 +35,16 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _write_table
 except ImportError:
-    pass
+    pq = None
 
 
 try:
     import pandas as pd
+    import pandas.testing as tm
+
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
@@ -494,12 +496,12 @@ def test_multi_dataset_metadata(tempdir):
 
     # Write merged metadata-only file
     with open(metapath, "wb") as f:
-        _meta.write_metadata_file(f)  # type: ignore[possibly-unbound-attribute]
+        _meta.write_metadata_file(f)
 
     # Read back the metadata
     meta = pq.read_metadata(metapath)
     md = meta.to_dict()
-    _md = _meta.to_dict()  # type: ignore[possibly-unbound-attribute]
+    _md = _meta.to_dict()
     for key in _md:
         if key != 'serialized_size':
             assert _md[key] == md[key]
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index edc7a2610eb..703232b7cac 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -16,12 +16,12 @@
 # under the License.
 
 import io
-from json import loads as json_loads
+import json
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -34,7 +34,7 @@
     from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
                                               _write_table)
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -44,7 +44,7 @@
     from pyarrow.tests.parquet.common import (_roundtrip_pandas_dataframe,
                                               alltypes_sample)
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
@@ -65,7 +65,7 @@ def test_pandas_parquet_custom_metadata(tempdir):
     metadata = pq.read_metadata(filename).metadata
     assert b'pandas' in metadata
 
-    js = json_loads(metadata[b'pandas'].decode('utf8'))
+    js = json.loads(metadata[b'pandas'].decode('utf8'))
     assert js['index_columns'] == [{'kind': 'range',
                                     'name': None,
                                     'start': 0, 'stop': 10000,
@@ -260,8 +260,7 @@ def test_pandas_parquet_configuration_options(tempdir):
 
     for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4', 'ZSTD']:
         if (compression != 'NONE' and
-                not pa.lib.Codec.is_available(compression)): \
-                # type: ignore[unresolved-attribute]
+                not pa.lib.Codec.is_available(compression)):
             continue
         _write_table(arrow_table, filename, version='2.6',
                      compression=compression)
@@ -426,8 +425,7 @@ def test_backwards_compatible_column_metadata_handling(datadir):
     table = _read_table(
         path, columns=['a'])
     result = table.to_pandas()
-    tm.assert_frame_equal(result, expected[['a']].reset_index(
-        drop=True))  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
 
 
 @pytest.mark.pandas
@@ -487,7 +485,7 @@ def test_pandas_categorical_roundtrip():
     codes = np.array([2, 0, 0, 2, 0, -1, 2], dtype='int32')
     categories = ['foo', 'bar', 'baz']
     df = pd.DataFrame({'x': pd.Categorical.from_codes(
-        codes, categories=pd.Index(categories))})
+        codes, categories=categories)})
 
     buf = pa.BufferOutputStream()
     pq.write_table(pa.table(df), buf)
@@ -532,18 +530,15 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
-        # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]])
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
-        # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]])
 
     pq.write_table(table, str(tempdir / "data.parquet"))
     result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]]) \
-        # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result[["col"]], df[["col"]])
 
 
 @pytest.mark.pandas
@@ -560,7 +555,7 @@ def test_write_to_dataset_pandas_preserve_index(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result, df_cat)  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result, df_cat)
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 4d4b467e9d3..24ffe612ef7 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -30,13 +30,15 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _write_table
 except ImportError:
-    pass
+    pq = None
 
 try:
+    import pandas as pd
     import pandas.testing as tm
+
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
@@ -325,7 +327,7 @@ def test_parquet_file_with_filesystem(s3_example_fs, use_uri):
     table = pa.table({"a": range(10)})
     pq.write_table(table, s3_path, filesystem=s3_fs)
 
-    parquet_file = pq.ParquetFile(*args, **kwargs)  # type: ignore[missing-argument]
+    parquet_file = pq.ParquetFile(*args, **kwargs)
     assert parquet_file.read() == table
     assert not parquet_file.closed
     parquet_file.close()
@@ -406,7 +408,7 @@ def test_parquet_file_hugginface_support():
         pytest.skip("fsspec is not installed, skipping Hugging Face test")
 
     fake_hf_module = types.ModuleType("huggingface_hub")
-    fake_hf_module.HfFileSystem = MemoryFileSystem  # type: ignore[unresolved-attribute]
+    fake_hf_module.HfFileSystem = MemoryFileSystem
     with mock.patch.dict("sys.modules", {"huggingface_hub": fake_hf_module}):
         uri = "hf://datasets/apache/arrow/test.parquet"
         table = pa.table({"a": range(10)})
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index d6f30ea16be..d1e9e874ba1 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -25,7 +25,7 @@
     from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
                                               _range_integers)
 except ImportError:
-    pass
+    pq = None
 
 
 try:
@@ -33,7 +33,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pass
+    pd = tm = None
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 07ebaa771f1..450cce74f1d 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -24,24 +24,24 @@
 try:
     import hypothesis.extra.numpy as npst
 except ImportError:
-    pass
+    npst = None
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    pass
+    tzst = None
 try:
     import zoneinfo
 except ImportError:
-    pass
+    zoneinfo = None
 if sys.platform == 'win32':
     try:
         import tzdata  # noqa:F401
     except ImportError:
-        pass
+        zoneinfo = None
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index bbec49c5360..cb97e3849fd 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -19,8 +19,7 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.compute import field, multiply, sum, equal, all as pc_all \
-    # type: ignore[unresolved-import]
+from pyarrow.compute import field
 
 try:
     from pyarrow.acero import (
@@ -38,9 +37,9 @@
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow.acero import ScanNodeOptions  # type: ignore[possibly-unbound-import]
+    from pyarrow.acero import ScanNodeOptions
 except ImportError:
-    pass
+    ds = None
 
 pytestmark = pytest.mark.acero
 
@@ -122,7 +121,7 @@ def test_filter(table_source):
 ])
 def test_filter_all_rows(source):
     # GH-46057: filtering all rows should return empty RecordBatch with same schema
-    result_expr = source.filter(field("number") < 0)
+    result_expr = source.filter(pc.field("number") < 0)
 
     assert result_expr.num_rows == 0
     assert type(result_expr) is type(source)
@@ -139,7 +138,7 @@ def test_project(table_source):
     # default name from expression
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([multiply(field("a"), 2)]))
+        Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["multiply(a, 2)"]
@@ -148,7 +147,7 @@ def test_project(table_source):
     # provide name
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([multiply(field("a"), 2)], ["a2"]))
+        Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2"]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["a2"]
@@ -156,12 +155,12 @@ def test_project(table_source):
 
     # input validation
     with pytest.raises(ValueError):
-        ProjectNodeOptions([multiply(field("a"), 2)], ["a2", "b2"])
+        ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2", "b2"])
 
     # no scalar expression
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions([sum(field("a"))]))
+        Declaration("project", ProjectNodeOptions([pc.sum(field("a"))]))
     ])
     with pytest.raises(ValueError, match="cannot Execute non-scalar expression"):
         _ = decl.to_table()
@@ -371,7 +370,7 @@ def test_hash_join_with_residual_filter():
 
     join_opts = HashJoinNodeOptions(
         "inner", left_keys="key", right_keys="key",
-        filter_expression=equal(field('a'), 5))
+        filter_expression=pc.equal(pc.field('a'), 5))
     joined = Declaration(
         "hashjoin", options=join_opts, inputs=[left_source, right_source])
     result = joined.to_table()
@@ -383,7 +382,7 @@ def test_hash_join_with_residual_filter():
     # test filter expression referencing columns from both side
     join_opts = HashJoinNodeOptions(
         "left outer", left_keys="key", right_keys="key",
-        filter_expression=equal(field("a"), 5) | equal(field("b"), 10)
+        filter_expression=pc.equal(pc.field("a"), 5) | pc.equal(pc.field("b"), 10)
     )
     joined = Declaration(
         "hashjoin", options=join_opts, inputs=[left_source, right_source])
@@ -488,10 +487,10 @@ def test_scan(tempdir):
 
     # projection scan option
 
-    scan_opts = ScanNodeOptions(dataset, columns={"a2": multiply(field("a"), 2)})
+    scan_opts = ScanNodeOptions(dataset, columns={"a2": pc.multiply(field("a"), 2)})
     decl = Declaration("scan", scan_opts)
     result = decl.to_table()
     # "a" is included in the result (needed later on for the actual projection)
     assert result["a"].to_pylist() == [1, 2, 3]
     # "b" is still included, but without data as it will be removed by the projection
-    assert pc_all(result["b"].is_null()).as_py()
+    assert pc.all(result["b"].is_null()).as_py()
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index 9f61bc7ddfe..76a766984da 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 6ab39dd8716..009ab1e849b 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -30,11 +30,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
-try:
-    from pyarrow import lib  # type: ignore[unresolved-import]
-except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -327,7 +323,7 @@ def test_asarray():
     np_arr = np.asarray([_ for _ in arr])
     assert np_arr.tolist() == [0, 1, 2, 3]
     assert np_arr.dtype == np.dtype('O')
-    assert isinstance(np_arr[0], lib.Int64Value)
+    assert isinstance(np_arr[0], pa.lib.Int64Value)
 
     # Calling with the arrow array gives back an array with 'int64' dtype
     np_arr = np.asarray(arr)
@@ -554,9 +550,7 @@ def test_arange():
     for case in cases:
         result = pa.arange(*case)
         result.validate(full=True)
-
-        assert result.equals(pa.array(list(range(*case)), type=pa.int64())) \
-            # type: ignore[no-matching-overload]
+        assert result.equals(pa.array(list(range(*case)), type=pa.int64()))
 
     # Validate memory_pool keyword argument
     result = pa.arange(-1, 101, memory_pool=pa.default_memory_pool())
@@ -1912,9 +1906,9 @@ def test_cast_from_null():
     out_types = [
 
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
     ]
     in_arr = pa.array(in_data, type=pa.null())
     for out_type in out_types:
@@ -3227,8 +3221,8 @@ def test_struct_array_field():
     x2 = a.field('x')
     y2 = a.field('y')
 
-    assert isinstance(x0, lib.Int16Array)
-    assert isinstance(y1, lib.FloatArray)
+    assert isinstance(x0, pa.lib.Int16Array)
+    assert isinstance(y1, pa.lib.FloatArray)
     assert x0.equals(pa.array([1, 3, 5], type=pa.int16()))
     assert y0.equals(pa.array([2.5, 4.5, 6.5], type=pa.float32()))
     assert x0.equals(x1)
@@ -3262,8 +3256,8 @@ def test_struct_array_flattened_field():
     x2 = a._flattened_field('x')
     y2 = a._flattened_field('y')
 
-    assert isinstance(x0, lib.Int16Array)
-    assert isinstance(y1, lib.FloatArray)
+    assert isinstance(x0, pa.lib.Int16Array)
+    assert isinstance(y1, pa.lib.FloatArray)
     assert x0.equals(pa.array([1, None, 5], type=pa.int16()))
     assert y0.equals(pa.array([2.5, None, 6.5], type=pa.float32()))
     assert x0.equals(x1)
@@ -3311,7 +3305,7 @@ def test_empty_cast():
             # ARROW-4766: Ensure that supported types conversion don't segfault
             # on empty arrays of common types
             pa.array([], type=t1).cast(t2)
-        except (lib.ArrowNotImplementedError, pa.ArrowInvalid):
+        except (pa.lib.ArrowNotImplementedError, pa.ArrowInvalid):
             continue
 
 
@@ -4107,7 +4101,7 @@ def test_list_view_from_arrays_fails(list_array_type, list_type_factory):
     mask = pa.array([False, False, True])
 
     # Ambiguous to specify both validity map and offsets or sizes with nulls
-    with pytest.raises(lib.ArrowInvalid):
+    with pytest.raises(pa.lib.ArrowInvalid):
         list_array_type.from_arrays(offsets, sizes, values, mask=mask)
 
     offsets = [0, 1, 1]
@@ -4115,7 +4109,7 @@ def test_list_view_from_arrays_fails(list_array_type, list_type_factory):
     array_slice = array[1:]
 
     # List offsets and sizes must not be slices if a validity map is specified
-    with pytest.raises(lib.ArrowInvalid):
+    with pytest.raises(pa.lib.ArrowInvalid):
         list_array_type.from_arrays(
             array_slice.offsets, array_slice.sizes,
             array_slice.values, mask=array_slice.is_null())
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index 65ca1458d0c..9187a19b5fc 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -19,8 +19,7 @@
 import weakref
 
 import pyarrow as pa
-from pyarrow.lib import StringBuilder, StringViewBuilder \
-    # type: ignore[unresolved_import]
+from pyarrow.lib import StringBuilder, StringViewBuilder
 
 
 def test_weakref():
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 306225dbf69..84290a6b880 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -24,7 +24,7 @@
 try:
     from pyarrow.cffi import ffi
 except ImportError:
-    pass
+    ffi = None
 
 import pytest
 
@@ -32,7 +32,7 @@
     import pandas as pd
     import pandas.testing as tm
 except ImportError:
-    pass
+    pd = tm = None
 
 
 needs_cffi = pytest.mark.skipif(ffi is None,
@@ -676,8 +676,7 @@ def test_roundtrip_reader_capsule(constructor):
     obj = constructor(schema, batches)
 
     bad_schema = pa.schema({'ints': pa.int32()})
-    with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"): \
-            # type: ignore[unresolved-attribute]
+    with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"):
         obj.__arrow_c_stream__(bad_schema.__arrow_c_schema__())
 
     # Can work with matching schema
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index e9afe643994..ad61dbc48a7 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -27,56 +27,25 @@
 import random
 import sys
 import textwrap
-from pyarrow import lib   # type: ignore[unresolved-import]
-from pyarrow.compute import \
-    sum as pc_sum, mode, variance, skew, kurtosis, count_substring, \
-    count_substring_regex, find_substring, find_substring_regex, match_like, \
-    match_substring, match_substring_regex, utf8_trim_whitespace, \
-    ascii_trim_whitespace, utf8_trim, utf8_slice_codeunits, binary_slice, \
-    split_pattern, utf8_split_whitespace, ascii_split_whitespace, \
-    split_pattern_regex, any as pc_any, all as pc_all, filter, min_max, \
-    choose, utf8_is_printable, ascii_center, ascii_lpad, ascii_rpad, utf8_center, \
-    utf8_lpad, utf8_rpad, binary_replace_slice, utf8_replace_slice, \
-    replace_substring, replace_substring_regex, extract_regex, extract_regex_span, \
-    binary_join, binary_join_element_wise, not_equal, less, less_equal, greater, \
-    greater_equal, equal, round_to_multiple, round_binary, is_null, or_kleene, \
-    is_valid, and_, and_kleene, or_, xor, invert, dictionary_decode, \
-    dictionary_encode, strptime, strftime, year as pc_year, \
-    is_leap_year as pc_is_leap_year, month as pc_month, day as pc_day, \
-    day_of_year as pc_day_of_year, iso_year as pc_iso_year, iso_week as pc_iso_week, \
-    iso_calendar as pc_iso_calendar, quarter as pc_quarter, hour as pc_hour, \
-    minute as pc_minute, second as pc_second, millisecond as pc_millisecond, \
-    microsecond as pc_microsecond, nanosecond as pc_nanosecond, \
-    subsecond as pc_subsecond, local_timestamp as pc_local_timestamp, \
-    is_dst as pc_is_dst, day_of_week as pc_day_of_week, \
-    week as pc_week, \
-    assume_timezone as pc_assume_timezone, count, ceil_temporal, floor_temporal, \
-    round_temporal, partition_nth_indices, select_k_unstable, array_sort_indices, \
-    sort_indices, is_in, index_in, quantile, tdigest, cumulative_sum, \
-    cumulative_prod, max_element_wise, min_element_wise, cumulative_min, \
-    cumulative_max, map_lookup, struct_field, case_when, make_struct, list_element, \
-    count_distinct, utf8_normalize, rank, rank_quantile, rank_normal, negate, \
-    subtract, divide, multiply, power, sqrt, exp, cos, sin, tan, acos, atan, \
-    asin, atan2, sinh, cosh, tanh, asinh, acosh, atanh, abs as pc_abs, sign, \
-    bit_wise_not, bit_wise_and, \
-    bit_wise_or, bit_wise_xor, is_nan, is_finite, coalesce, hour, round as pc_round, \
-    add as pc_add, cast, list_slice, run_end_decode, run_end_encode, pairwise_diff, \
-    pairwise_diff_checked, pivot_wider, winsorize   # type: ignore[unresolved-import]
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 try:
     import pandas as pd
 except ImportError:
-    pass
+    pd = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import ArrowNotImplementedError  # type: ignore[unresolved_import]
+from pyarrow.lib import ArrowNotImplementedError
 
+try:
+    import pyarrow.substrait as pas
+except ImportError:
+    pas = None
 
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
@@ -359,36 +328,36 @@ def test_function_attributes():
 
 def test_input_type_conversion():
     # Automatic array conversion from Python
-    arr = pc_add([1, 2], [4, None])
+    arr = pc.add([1, 2], [4, None])
     assert arr.to_pylist() == [5, None]
     # Automatic scalar conversion from Python
-    arr = pc_add([1, 2], 4)
+    arr = pc.add([1, 2], 4)
     assert arr.to_pylist() == [5, 6]
     # Other scalar type
-    assert equal(["foo", "bar", None],
-                 "foo").to_pylist() == [True, False, None]
+    assert pc.equal(["foo", "bar", None],
+                    "foo").to_pylist() == [True, False, None]
 
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_sum_array(arrow_type):
     arr = pa.array([1, 2, 3, 4], type=arrow_type)
     assert arr.sum().as_py() == 10
-    assert pc_sum(arr).as_py() == 10
+    assert pc.sum(arr).as_py() == 10
 
     arr = pa.array([1, 2, 3, 4, None], type=arrow_type)
     assert arr.sum().as_py() == 10
-    assert pc_sum(arr).as_py() == 10
+    assert pc.sum(arr).as_py() == 10
 
     arr = pa.array([None], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
-    assert pc_sum(arr).as_py() is None  # noqa: E711
+    assert pc.sum(arr).as_py() is None  # noqa: E711
     assert arr.sum(min_count=0).as_py() == 0
-    assert pc_sum(arr, min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
     arr = pa.array([], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
     assert arr.sum(min_count=0).as_py() == 0
-    assert pc_sum(arr, min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize("arrow_type", [pa.decimal128(3, 2), pa.decimal256(3, 2)])
@@ -437,24 +406,24 @@ def test_sum_decimal_array(arrow_type):
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_sum_chunked_array(arrow_type):
     arr = pa.chunked_array([pa.array([1, 2, 3, 4], type=arrow_type)])
-    assert pc_sum(arr).as_py() == 10
+    assert pc.sum(arr).as_py() == 10
 
     arr = pa.chunked_array([
         pa.array([1, 2], type=arrow_type), pa.array([3, 4], type=arrow_type)
     ])
-    assert pc_sum(arr).as_py() == 10
+    assert pc.sum(arr).as_py() == 10
 
     arr = pa.chunked_array([
         pa.array([1, 2], type=arrow_type),
         pa.array([], type=arrow_type),
         pa.array([3, 4], type=arrow_type)
     ])
-    assert pc_sum(arr).as_py() == 10
+    assert pc.sum(arr).as_py() == 10
 
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
-    assert pc_sum(arr).as_py() is None  # noqa: E711
-    assert pc_sum(arr, min_count=0).as_py() == 0
+    assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize('arrow_type', [pa.decimal128(3, 2), pa.decimal256(3, 2)])
@@ -473,77 +442,77 @@ def test_sum_chunked_array_decimal_type(arrow_type):
             pa.array([Decimal("1.23"), Decimal("4.56")], type=arrow_type)
         ]
     )
-    assert pc_sum(arr).as_py() == expected_sum
-    assert pc_sum(arr).type == max_precision_type
+    assert pc.sum(arr).as_py() == expected_sum
+    assert pc.sum(arr).type == max_precision_type
 
     arr = pa.chunked_array([
         pa.array([Decimal("1.23")], type=arrow_type),
         pa.array([Decimal("4.56")], type=arrow_type)
     ])
-    assert pc_sum(arr).as_py() == expected_sum
-    assert pc_sum(arr).type == max_precision_type
+    assert pc.sum(arr).as_py() == expected_sum
+    assert pc.sum(arr).type == max_precision_type
 
     arr = pa.chunked_array([
         pa.array([Decimal("1.23")], type=arrow_type),
         pa.array([], type=arrow_type),
         pa.array([Decimal("4.56")], type=arrow_type)
     ])
-    assert pc_sum(arr).as_py() == expected_sum
-    assert pc_sum(arr).type == max_precision_type
+    assert pc.sum(arr).as_py() == expected_sum
+    assert pc.sum(arr).type == max_precision_type
 
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
-    assert pc_sum(arr).as_py() is None  # noqa: E711
-    assert pc_sum(arr).type == max_precision_type
-    assert pc_sum(arr, min_count=0).as_py() == zero
-    assert pc_sum(arr, min_count=0).type == max_precision_type
+    assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert pc.sum(arr).type == max_precision_type
+    assert pc.sum(arr, min_count=0).as_py() == zero
+    assert pc.sum(arr, min_count=0).type == max_precision_type
 
 
 def test_mode_array():
     # ARROW-9917
-    data = pa.array([1, 1, 3, 4, 3, 5], type='int64')
-    arr = mode(data)
-    assert len(arr) == 1
-    assert arr[0].as_py() == {"mode": 1, "count": 2}
-
-    arr = mode(data, n=2)
-    assert len(arr) == 2
-    assert arr[0].as_py() == {"mode": 1, "count": 2}
-    assert arr[1].as_py() == {"mode": 3, "count": 2}
-
-    data = pa.array([], type='int64')
-    assert len(mode(data)) == 0
-
-    data = pa.array([1, 1, 3, 4, 3, None], type='int64')
-    arr = mode(data, skip_nulls=False)
-    assert len(arr) == 0
-    arr = mode(data, min_count=6)
-    assert len(arr) == 0
-    arr = mode(data, skip_nulls=False, min_count=5)
-    assert len(arr) == 0
-
-    data = pa.array([True, False])
-    arr = mode(data, n=2)
-    assert len(arr) == 2
-    assert arr[0].as_py() == {"mode": False, "count": 1}
-    assert arr[1].as_py() == {"mode": True, "count": 1}
+    arr = pa.array([1, 1, 3, 4, 3, 5], type='int64')
+    mode = pc.mode(arr)
+    assert len(mode) == 1
+    assert mode[0].as_py() == {"mode": 1, "count": 2}
+
+    mode = pc.mode(arr, n=2)
+    assert len(mode) == 2
+    assert mode[0].as_py() == {"mode": 1, "count": 2}
+    assert mode[1].as_py() == {"mode": 3, "count": 2}
+
+    arr = pa.array([], type='int64')
+    assert len(pc.mode(arr)) == 0
+
+    arr = pa.array([1, 1, 3, 4, 3, None], type='int64')
+    mode = pc.mode(arr, skip_nulls=False)
+    assert len(mode) == 0
+    mode = pc.mode(arr, min_count=6)
+    assert len(mode) == 0
+    mode = pc.mode(arr, skip_nulls=False, min_count=5)
+    assert len(mode) == 0
+
+    arr = pa.array([True, False])
+    mode = pc.mode(arr, n=2)
+    assert len(mode) == 2
+    assert mode[0].as_py() == {"mode": False, "count": 1}
+    assert mode[1].as_py() == {"mode": True, "count": 1}
 
 
 def test_mode_chunked_array():
     # ARROW-9917
-    data = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')])
-    arr = mode(data)
-    assert len(arr) == 1
-    assert arr[0].as_py() == {"mode": 1, "count": 2}
+    arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')])
+    mode = pc.mode(arr)
+    assert len(mode) == 1
+    assert mode[0].as_py() == {"mode": 1, "count": 2}
 
-    arr = mode(data, n=2)
-    assert len(arr) == 2
-    assert arr[0].as_py() == {"mode": 1, "count": 2}
-    assert arr[1].as_py() == {"mode": 3, "count": 2}
+    mode = pc.mode(arr, n=2)
+    assert len(mode) == 2
+    assert mode[0].as_py() == {"mode": 1, "count": 2}
+    assert mode[1].as_py() == {"mode": 3, "count": 2}
 
     arr = pa.chunked_array((), type='int64')
     assert arr.num_chunks == 0
-    assert len(mode(arr)) == 0
+    assert len(pc.mode(arr)) == 0
 
 
 def test_empty_chunked_array():
@@ -556,23 +525,23 @@ def test_empty_chunked_array():
 
 def test_variance():
     data = [1, 2, 3, 4, 5, 6, 7, 8]
-    assert variance(data).as_py() == 5.25
-    assert variance(data, ddof=0).as_py() == 5.25
-    assert variance(data, ddof=1).as_py() == 6.0
+    assert pc.variance(data).as_py() == 5.25
+    assert pc.variance(data, ddof=0).as_py() == 5.25
+    assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
 def test_skew():
     data = [1, 1, None, 2]
-    assert skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
-    assert skew(data, skip_nulls=False).as_py() is None
-    assert skew(data, min_count=4).as_py() is None
+    assert pc.skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
+    assert pc.skew(data, skip_nulls=False).as_py() is None
+    assert pc.skew(data, min_count=4).as_py() is None
 
 
 def test_kurtosis():
     data = [1, 1, None, 2]
-    assert kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
-    assert kurtosis(data, skip_nulls=False).as_py() is None
-    assert kurtosis(data, min_count=4).as_py() is None
+    assert pc.kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
+    assert pc.kurtosis(data, skip_nulls=False).as_py() is None
+    assert pc.kurtosis(data, min_count=4).as_py() is None
 
 
 @pytest.mark.parametrize("input, expected", (
@@ -585,8 +554,8 @@ def test_kurtosis():
     ([1, 40], {'skew': None, 'kurtosis': None}),
 ))
 def test_unbiased_skew_and_kurtosis(input, expected):
-    arrow_skew = skew(input, skip_nulls=True, biased=False)
-    arrow_kurtosis = kurtosis(input, skip_nulls=True, biased=False)
+    arrow_skew = pc.skew(input, skip_nulls=True, biased=False)
+    arrow_kurtosis = pc.kurtosis(input, skip_nulls=True, biased=False)
     assert arrow_skew.as_py() == expected['skew']
     assert arrow_kurtosis.as_py() == expected['kurtosis']
 
@@ -596,11 +565,11 @@ def test_count_substring():
                          (pa.large_string(), pa.int64())]:
         arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None], type=ty)
 
-        result = count_substring(arr, "ab")
+        result = pc.count_substring(arr, "ab")
         expected = pa.array([1, 1, 2, 0, 0, None], type=offset)
         assert expected == result
 
-        result = count_substring(arr, "ab", ignore_case=True)
+        result = pc.count_substring(arr, "ab", ignore_case=True)
         expected = pa.array([1, 1, 2, 0, 1, None], type=offset)
         assert expected == result
 
@@ -610,11 +579,11 @@ def test_count_substring_regex():
                          (pa.large_string(), pa.int64())]:
         arr = pa.array(["ab", "cab", "baAacaa", "ba", "AB", None], type=ty)
 
-        result = count_substring_regex(arr, "a+")
+        result = pc.count_substring_regex(arr, "a+")
         expected = pa.array([1, 1, 3, 1, 0, None], type=offset)
         assert expected.equals(result)
 
-        result = count_substring_regex(arr, "a+", ignore_case=True)
+        result = pc.count_substring_regex(arr, "a+", ignore_case=True)
         expected = pa.array([1, 1, 2, 1, 1, None], type=offset)
         assert expected.equals(result)
 
@@ -622,61 +591,61 @@ def test_count_substring_regex():
 def test_find_substring():
     for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
         arr = pa.array(["ab", "cab", "ba", None], type=ty)
-        result = find_substring(arr, "ab")
+        result = pc.find_substring(arr, "ab")
         assert result.to_pylist() == [0, 1, -1, None]
 
-        result = find_substring_regex(arr, "a?b")
+        result = pc.find_substring_regex(arr, "a?b")
         assert result.to_pylist() == [0, 1, 0, None]
 
         arr = pa.array(["ab*", "cAB*", "ba", "aB?"], type=ty)
-        result = find_substring(arr, "aB*", ignore_case=True)
+        result = pc.find_substring(arr, "aB*", ignore_case=True)
         assert result.to_pylist() == [0, 1, -1, -1]
 
-        result = find_substring_regex(arr, "a?b", ignore_case=True)
+        result = pc.find_substring_regex(arr, "a?b", ignore_case=True)
         assert result.to_pylist() == [0, 1, 0, 0]
 
 
 def test_match_like():
     arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
-    result = match_like(arr, r"_a\%%")
+    result = pc.match_like(arr, r"_a\%%")
     expected = pa.array([False, True, False, True, None])
     assert expected.equals(result)
 
     arr = pa.array(["aB", "bA%", "ba", "ca%d", None])
-    result = match_like(arr, r"_a\%%", ignore_case=True)
+    result = pc.match_like(arr, r"_a\%%", ignore_case=True)
     expected = pa.array([False, True, False, True, None])
     assert expected.equals(result)
-    result = match_like(arr, r"_a\%%", ignore_case=False)
+    result = pc.match_like(arr, r"_a\%%", ignore_case=False)
     expected = pa.array([False, False, False, True, None])
     assert expected.equals(result)
 
 
 def test_match_substring():
     arr = pa.array(["ab", "abc", "ba", None])
-    result = match_substring(arr, "ab")
+    result = pc.match_substring(arr, "ab")
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
 
     arr = pa.array(["áB", "Ábc", "ba", None])
-    result = match_substring(arr, "áb", ignore_case=True)
+    result = pc.match_substring(arr, "áb", ignore_case=True)
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
-    result = match_substring(arr, "áb", ignore_case=False)
+    result = pc.match_substring(arr, "áb", ignore_case=False)
     expected = pa.array([False, False, False, None])
     assert expected.equals(result)
 
 
 def test_match_substring_regex():
     arr = pa.array(["ab", "abc", "ba", "c", None])
-    result = match_substring_regex(arr, "^a?b")
+    result = pc.match_substring_regex(arr, "^a?b")
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
 
     arr = pa.array(["aB", "Abc", "BA", "c", None])
-    result = match_substring_regex(arr, "^a?b", ignore_case=True)
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
-    result = match_substring_regex(arr, "^a?b", ignore_case=False)
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
     expected = pa.array([False, False, False, False, None])
     assert expected.equals(result)
 
@@ -684,21 +653,21 @@ def test_match_substring_regex():
 def test_trim():
     # \u3000 is unicode whitespace
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = utf8_trim_whitespace(arr)
+    result = pc.utf8_trim_whitespace(arr)
     expected = pa.array(["foo", None, "foo bar"])
     assert expected.equals(result)
 
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = ascii_trim_whitespace(arr)
+    result = pc.ascii_trim_whitespace(arr)
     expected = pa.array(["foo", None, "\u3000foo bar"])
     assert expected.equals(result)
 
     arr = pa.array([" foo", None, " \u3000foo bar \t"])
-    result = utf8_trim(arr, characters=' f\u3000')
+    result = pc.utf8_trim(arr, characters=' f\u3000')
     expected = pa.array(["oo", None, "oo bar \t"])
     assert expected.equals(result)
     # Positional option
-    result = utf8_trim(arr, ' f\u3000')
+    result = pc.utf8_trim(arr, ' f\u3000')
     expected = pa.array(["oo", None, "oo bar \t"])
     assert expected.equals(result)
 
@@ -710,12 +679,12 @@ def test_slice_compatibility():
             for step in [-3, -2, -1, 1, 2, 3]:
                 expected = pa.array([k.as_py()[start:stop:step]
                                      for k in arr])
-                result = utf8_slice_codeunits(
+                result = pc.utf8_slice_codeunits(
                     arr, start=start, stop=stop, step=step)
                 assert expected.equals(result)
                 # Positional options
-                assert utf8_slice_codeunits(arr,
-                                            start, stop, step) == result
+                assert pc.utf8_slice_codeunits(arr,
+                                               start, stop, step) == result
 
 
 def test_binary_slice_compatibility():
@@ -728,113 +697,113 @@ def test_binary_slice_compatibility():
             continue
         expected = pa.array([k.as_py()[start:stop:step]
                              for k in arr])
-        result = binary_slice(
+        result = pc.binary_slice(
             arr, start=start, stop=stop, step=step)
         assert expected.equals(result)
         # Positional options
-        assert binary_slice(arr, start, stop, step) == result
+        assert pc.binary_slice(arr, start, stop, step) == result
         # Fixed size binary input / output
         for item in data:
             fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
             expected = item[start:stop:step]
-            actual = binary_slice(fsb_scalar, start, stop, step)
+            actual = pc.binary_slice(fsb_scalar, start, stop, step)
             assert actual.type == pa.binary(len(expected))
             assert actual.as_py() == expected
 
 
 def test_split_pattern():
     arr = pa.array(["-foo---bar--", "---foo---b"])
-    result = split_pattern(arr, pattern="---")
+    result = pc.split_pattern(arr, pattern="---")
     expected = pa.array([["-foo", "bar--"], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = split_pattern(arr, "---", max_splits=1)
+    result = pc.split_pattern(arr, "---", max_splits=1)
     expected = pa.array([["-foo", "bar--"], ["", "foo---b"]])
     assert expected.equals(result)
 
-    result = split_pattern(arr, "---", max_splits=1, reverse=True)
+    result = pc.split_pattern(arr, "---", max_splits=1, reverse=True)
     expected = pa.array([["-foo", "bar--"], ["---foo", "b"]])
     assert expected.equals(result)
 
 
 def test_split_whitespace_utf8():
     arr = pa.array(["foo bar", " foo  \u3000\tb"])
-    result = utf8_split_whitespace(arr)
+    result = pc.utf8_split_whitespace(arr)
     expected = pa.array([["foo", "bar"], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = utf8_split_whitespace(arr, max_splits=1)
+    result = pc.utf8_split_whitespace(arr, max_splits=1)
     expected = pa.array([["foo", "bar"], ["", "foo  \u3000\tb"]])
     assert expected.equals(result)
 
-    result = utf8_split_whitespace(arr, max_splits=1, reverse=True)
+    result = pc.utf8_split_whitespace(arr, max_splits=1, reverse=True)
     expected = pa.array([["foo", "bar"], [" foo", "b"]])
     assert expected.equals(result)
 
 
 def test_split_whitespace_ascii():
     arr = pa.array(["foo bar", " foo  \u3000\tb"])
-    result = ascii_split_whitespace(arr)
+    result = pc.ascii_split_whitespace(arr)
     expected = pa.array([["foo", "bar"], ["", "foo", "\u3000", "b"]])
     assert expected.equals(result)
 
-    result = ascii_split_whitespace(arr, max_splits=1)
+    result = pc.ascii_split_whitespace(arr, max_splits=1)
     expected = pa.array([["foo", "bar"], ["", "foo  \u3000\tb"]])
     assert expected.equals(result)
 
-    result = ascii_split_whitespace(arr, max_splits=1, reverse=True)
+    result = pc.ascii_split_whitespace(arr, max_splits=1, reverse=True)
     expected = pa.array([["foo", "bar"], [" foo  \u3000", "b"]])
     assert expected.equals(result)
 
 
 def test_split_pattern_regex():
     arr = pa.array(["-foo---bar--", "---foo---b"])
-    result = split_pattern_regex(arr, pattern="-+")
+    result = pc.split_pattern_regex(arr, pattern="-+")
     expected = pa.array([["", "foo", "bar", ""], ["", "foo", "b"]])
     assert expected.equals(result)
 
-    result = split_pattern_regex(arr, "-+", max_splits=1)
+    result = pc.split_pattern_regex(arr, "-+", max_splits=1)
     expected = pa.array([["", "foo---bar--"], ["", "foo---b"]])
     assert expected.equals(result)
 
     with pytest.raises(NotImplementedError,
                        match="Cannot split in reverse with regex"):
-        result = split_pattern_regex(
+        result = pc.split_pattern_regex(
             arr, pattern="---", max_splits=1, reverse=True)
 
 
 def test_min_max():
     # An example generated function wrapper with possible options
     data = [4, 5, 6, None, 1]
-    s = min_max(data)
+    s = pc.min_max(data)
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = min_max(data, options=pc.ScalarAggregateOptions())
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions())
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
     assert s.as_py() == {'min': None, 'max': None}
 
     # Options as dict of kwargs
-    s = min_max(data, options={'skip_nulls': False})
+    s = pc.min_max(data, options={'skip_nulls': False})
     assert s.as_py() == {'min': None, 'max': None}
     # Options as named functions arguments
-    s = min_max(data, skip_nulls=False)
+    s = pc.min_max(data, skip_nulls=False)
     assert s.as_py() == {'min': None, 'max': None}
 
     # Both options and named arguments
     with pytest.raises(TypeError):
-        s = min_max(
+        s = pc.min_max(
             data, options=pc.ScalarAggregateOptions(), skip_nulls=False)
 
     # Wrong options type
     options = pc.TakeOptions()
     with pytest.raises(TypeError):
-        s = min_max(data, options=options)
+        s = pc.min_max(data, options=options)
 
     # Missing argument
     with pytest.raises(TypeError, match="min_max takes 1 positional"):
-        s = min_max()
+        s = pc.min_max()
 
 
 def test_any():
@@ -843,17 +812,17 @@ def test_any():
     options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)
 
     a = pa.array([], type='bool')
-    assert pc_any(a).as_py() is None
-    assert pc_any(a, min_count=0).as_py() is False
-    assert pc_any(a, options=options).as_py() is False
+    assert pc.any(a).as_py() is None
+    assert pc.any(a, min_count=0).as_py() is False
+    assert pc.any(a, options=options).as_py() is False
 
     a = pa.array([False, None, True])
-    assert pc_any(a).as_py() is True
-    assert pc_any(a, options=options).as_py() is True
+    assert pc.any(a).as_py() is True
+    assert pc.any(a, options=options).as_py() is True
 
     a = pa.array([False, None, False])
-    assert pc_any(a).as_py() is False
-    assert pc_any(a, options=options).as_py() is None
+    assert pc.any(a).as_py() is False
+    assert pc.any(a, options=options).as_py() is None
 
 
 def test_all():
@@ -862,39 +831,39 @@ def test_all():
     options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)
 
     a = pa.array([], type='bool')
-    assert pc_all(a).as_py() is None
-    assert pc_all(a, min_count=0).as_py() is True
-    assert pc_all(a, options=options).as_py() is True
+    assert pc.all(a).as_py() is None
+    assert pc.all(a, min_count=0).as_py() is True
+    assert pc.all(a, options=options).as_py() is True
 
     a = pa.array([False, True])
-    assert pc_all(a).as_py() is False
-    assert pc_all(a, options=options).as_py() is False
+    assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
     a = pa.array([True, None])
-    assert pc_all(a).as_py() is True
-    assert pc_all(a, options=options).as_py() is None
+    assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [True, None]])
-    assert pc_all(a).as_py() is True
-    assert pc_all(a, options=options).as_py() is None
+    assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [False]])
-    assert pc_all(a).as_py() is False
-    assert pc_all(a, options=options).as_py() is False
+    assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
 
 def test_is_valid():
     # An example generated function wrapper without options
     data = [4, 5, None]
-    assert is_valid(data).to_pylist() == [True, True, False]
+    assert pc.is_valid(data).to_pylist() == [True, True, False]
 
     with pytest.raises(TypeError):
-        is_valid(data, options=None)
+        pc.is_valid(data, options=None)
 
 
 def test_generated_docstrings():
     # With options
-    assert min_max.__doc__ == textwrap.dedent("""\
+    assert pc.min_max.__doc__ == textwrap.dedent("""\
         Compute the minimum and maximum values of a numeric array.
 
         Null values are ignored by default.
@@ -916,7 +885,7 @@ def test_generated_docstrings():
             If not passed, will allocate memory from the default memory pool.
         """)
     # Without options
-    assert pc_add.__doc__ == textwrap.dedent("""\
+    assert pc.add.__doc__ == textwrap.dedent("""\
         Add the arguments element-wise.
 
         Results will wrap around on integer overflow.
@@ -933,7 +902,7 @@ def test_generated_docstrings():
             If not passed, will allocate memory from the default memory pool.
         """)
     # Varargs with options
-    assert min_element_wise.__doc__ == textwrap.dedent("""\
+    assert pc.min_element_wise.__doc__ == textwrap.dedent("""\
         Find the element-wise minimum value.
 
         Nulls are ignored (by default) or propagated.
@@ -951,7 +920,7 @@ def test_generated_docstrings():
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
         """)
-    assert filter.__doc__ == textwrap.dedent("""\
+    assert pc.filter.__doc__ == textwrap.dedent("""\
         Filter with a boolean selection filter.
 
         The output is populated with values from the input at positions
@@ -998,24 +967,24 @@ def test_generated_signatures():
     # options and their default values.
 
     # Without options
-    sig = inspect.signature(pc_add)
+    sig = inspect.signature(pc.add)
     assert str(sig) == "(x, y, /, *, memory_pool=None)"
     # With options
-    sig = inspect.signature(min_max)
+    sig = inspect.signature(pc.min_max)
     assert str(sig) == ("(array, /, *, skip_nulls=True, min_count=1, "
                         "options=None, memory_pool=None)")
     # With positional options
-    sig = inspect.signature(quantile)
+    sig = inspect.signature(pc.quantile)
     assert str(sig) == ("(array, /, q=0.5, *, interpolation='linear', "
                         "skip_nulls=True, min_count=0, "
                         "options=None, memory_pool=None)")
     # Varargs with options
-    sig = inspect.signature(binary_join_element_wise)
+    sig = inspect.signature(pc.binary_join_element_wise)
     assert str(sig) == ("(*strings, null_handling='emit_null', "
                         "null_replacement='', options=None, "
                         "memory_pool=None)")
     # Varargs without options
-    sig = inspect.signature(choose)
+    sig = inspect.signature(pc.choose)
     assert str(sig) == "(indices, /, *values, memory_pool=None)"
     # Nullary with options
     sig = inspect.signature(pc.random)
@@ -1032,7 +1001,7 @@ def find_new_unicode_codepoints():
     new = set()
     characters = [chr(c) for c in range(0x80, 0x11000)
                   if not (0xD800 <= c < 0xE000)]
-    is_printable = utf8_is_printable(pa.array(characters)).to_pylist()
+    is_printable = pc.utf8_is_printable(pa.array(characters)).to_pylist()
     for i, c in enumerate(characters):
         if is_printable[i] != c.isprintable():
             new.add(ord(c))
@@ -1152,20 +1121,20 @@ def test_string_py_compat_boolean(function_name, variant):
 
 def test_pad():
     arr = pa.array([None, 'a', 'abcd'])
-    assert ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
-    assert ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
-    assert ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
-    assert ascii_center(arr, 3).tolist() == [None, ' a ', 'abcd']
-    assert ascii_lpad(arr, 3).tolist() == [None, '  a', 'abcd']
-    assert ascii_rpad(arr, 3).tolist() == [None, 'a  ', 'abcd']
+    assert pc.ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
+    assert pc.ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
+    assert pc.ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
+    assert pc.ascii_center(arr, 3).tolist() == [None, ' a ', 'abcd']
+    assert pc.ascii_lpad(arr, 3).tolist() == [None, '  a', 'abcd']
+    assert pc.ascii_rpad(arr, 3).tolist() == [None, 'a  ', 'abcd']
 
     arr = pa.array([None, 'á', 'abcd'])
-    assert utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
-    assert utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
-    assert utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
-    assert utf8_center(arr, 3).tolist() == [None, ' á ', 'abcd']
-    assert utf8_lpad(arr, 3).tolist() == [None, '  á', 'abcd']
-    assert utf8_rpad(arr, 3).tolist() == [None, 'á  ', 'abcd']
+    assert pc.utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
+    assert pc.utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
+    assert pc.utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
+    assert pc.utf8_center(arr, 3).tolist() == [None, ' á ', 'abcd']
+    assert pc.utf8_lpad(arr, 3).tolist() == [None, '  á', 'abcd']
+    assert pc.utf8_rpad(arr, 3).tolist() == [None, 'á  ', 'abcd']
 
 
 def test_utf8_zfill():
@@ -1208,53 +1177,53 @@ def test_replace_slice():
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
-            actual = binary_replace_slice(
+            actual = pc.binary_replace_slice(
                 arr, start=start, stop=stop, replacement='XX')
             assert actual.tolist() == expected.tolist()
             # Positional options
-            assert binary_replace_slice(arr, start, stop, 'XX') == actual
+            assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual
 
     arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
     series = arr.to_pandas().astype(object).replace({np.nan: None})
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
-            actual = utf8_replace_slice(
+            actual = pc.utf8_replace_slice(
                 arr, start=start, stop=stop, replacement='XX')
             assert actual.tolist() == expected.tolist()
 
 
 def test_replace_plain():
     data = pa.array(['foozfoo', 'food', None])
-    ar = replace_substring(data, pattern='foo', replacement='bar')
+    ar = pc.replace_substring(data, pattern='foo', replacement='bar')
     assert ar.tolist() == ['barzbar', 'bard', None]
-    ar = replace_substring(data, 'foo', 'bar')
+    ar = pc.replace_substring(data, 'foo', 'bar')
     assert ar.tolist() == ['barzbar', 'bard', None]
 
-    ar = replace_substring(data, pattern='foo', replacement='bar',
-                           max_replacements=1)
+    ar = pc.replace_substring(data, pattern='foo', replacement='bar',
+                              max_replacements=1)
     assert ar.tolist() == ['barzfoo', 'bard', None]
-    ar = replace_substring(data, 'foo', 'bar', max_replacements=1)
+    ar = pc.replace_substring(data, 'foo', 'bar', max_replacements=1)
     assert ar.tolist() == ['barzfoo', 'bard', None]
 
 
 def test_replace_regex():
     data = pa.array(['foo', 'mood', None])
     expected = ['f00', 'm00d', None]
-    ar = replace_substring_regex(data, pattern='(.)oo', replacement=r'\100')
+    ar = pc.replace_substring_regex(data, pattern='(.)oo', replacement=r'\100')
     assert ar.tolist() == expected
-    ar = replace_substring_regex(data, '(.)oo', replacement=r'\100')
+    ar = pc.replace_substring_regex(data, '(.)oo', replacement=r'\100')
     assert ar.tolist() == expected
-    ar = replace_substring_regex(data, '(.)oo', r'\100')
+    ar = pc.replace_substring_regex(data, '(.)oo', r'\100')
     assert ar.tolist() == expected
 
 
 def test_extract_regex():
     ar = pa.array(['a1', 'zb2z'])
     expected = [{'letter': 'a', 'digit': '1'}, {'letter': 'b', 'digit': '2'}]
-    struct = extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
+    struct = pc.extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
     assert struct.tolist() == expected
-    struct = extract_regex(ar, r'(?P<letter>[ab])(?P<digit>\d)')
+    struct = pc.extract_regex(ar, r'(?P<letter>[ab])(?P<digit>\d)')
     assert struct.tolist() == expected
 
 
@@ -1262,50 +1231,50 @@ def test_extract_regex_span():
     ar = pa.array(['a1', 'zb234z'])
     expected = [{'letter': [0, 1], 'digit': [1, 1]},
                 {'letter': [1, 1], 'digit': [2, 3]}]
-    struct = extract_regex_span(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d+)')
+    struct = pc.extract_regex_span(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d+)')
     assert struct.tolist() == expected
-    struct = extract_regex_span(ar, r'(?P<letter>[ab])(?P<digit>\d+)')
+    struct = pc.extract_regex_span(ar, r'(?P<letter>[ab])(?P<digit>\d+)')
     assert struct.tolist() == expected
 
 
 def test_binary_join():
     ar_list = pa.array([['foo', 'bar'], None, []])
     expected = pa.array(['foo-bar', None, ''])
-    assert binary_join(ar_list, '-').equals(expected)
+    assert pc.binary_join(ar_list, '-').equals(expected)
 
     separator_array = pa.array(['1', '2'], type=pa.binary())
     expected = pa.array(['a1b', 'c2d'], type=pa.binary())
     ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
-    assert binary_join(ar_list, separator_array).equals(expected)
+    assert pc.binary_join(ar_list, separator_array).equals(expected)
 
 
 def test_binary_join_element_wise():
     null = pa.scalar(None, type=pa.string())
     arrs = [[None, 'a', 'b'], ['c', None, 'd'], [None, '-', '--']]
-    assert binary_join_element_wise(*arrs).to_pylist() == \
+    assert pc.binary_join_element_wise(*arrs).to_pylist() == \
         [None, None, 'b--d']
-    assert binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
-    assert binary_join_element_wise('a', null, '-').as_py() is None
-    assert binary_join_element_wise('a', 'b', null).as_py() is None
+    assert pc.binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
+    assert pc.binary_join_element_wise('a', null, '-').as_py() is None
+    assert pc.binary_join_element_wise('a', 'b', null).as_py() is None
 
     skip = pc.JoinOptions(null_handling='skip')
-    assert binary_join_element_wise(*arrs, options=skip).to_pylist() == \
+    assert pc.binary_join_element_wise(*arrs, options=skip).to_pylist() == \
         [None, 'a', 'b--d']
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', 'b', '-', options=skip).as_py() == 'a-b'
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', null, '-', options=skip).as_py() == 'a'
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', 'b', null, options=skip).as_py() is None
 
     replace = pc.JoinOptions(null_handling='replace', null_replacement='spam')
-    assert binary_join_element_wise(*arrs, options=replace).to_pylist() == \
+    assert pc.binary_join_element_wise(*arrs, options=replace).to_pylist() == \
         [None, 'a-spam', 'b--d']
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', 'b', '-', options=replace).as_py() == 'a-b'
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', null, '-', options=replace).as_py() == 'a-spam'
-    assert binary_join_element_wise(
+    assert pc.binary_join_element_wise(
         'a', 'b', null, options=replace).as_py() is None
 
 
@@ -1633,22 +1602,22 @@ def con(values):
     arr1 = con([1, 2, 3, 4, None])
     arr2 = con([1, 1, 4, None, 4])
 
-    result = equal(arr1, arr2)
+    result = pc.equal(arr1, arr2)
     assert result.equals(con([True, False, False, None, None]))
 
-    result = not_equal(arr1, arr2)
+    result = pc.not_equal(arr1, arr2)
     assert result.equals(con([False, True, True, None, None]))
 
-    result = less(arr1, arr2)
+    result = pc.less(arr1, arr2)
     assert result.equals(con([False, False, True, None, None]))
 
-    result = less_equal(arr1, arr2)
+    result = pc.less_equal(arr1, arr2)
     assert result.equals(con([True, False, True, None, None]))
 
-    result = greater(arr1, arr2)
+    result = pc.greater(arr1, arr2)
     assert result.equals(con([False, True, False, None, None]))
 
-    result = greater_equal(arr1, arr2)
+    result = pc.greater_equal(arr1, arr2)
     assert result.equals(con([True, True, False, None, None]))
 
 
@@ -1664,28 +1633,28 @@ def con(values):
     arr = con(['a', 'b', 'c', None])
     scalar = pa.scalar('b')
 
-    result = equal(arr, scalar)
+    result = pc.equal(arr, scalar)
     assert result.equals(con([False, True, False, None]))
 
     if typ == "array":
         nascalar = pa.scalar(None, type="string")
-        result = equal(arr, nascalar)
-        isnull = is_null(result)
+        result = pc.equal(arr, nascalar)
+        isnull = pc.is_null(result)
         assert isnull.equals(con([True, True, True, True]))
 
-    result = not_equal(arr, scalar)
+    result = pc.not_equal(arr, scalar)
     assert result.equals(con([True, False, True, None]))
 
-    result = less(arr, scalar)
+    result = pc.less(arr, scalar)
     assert result.equals(con([True, False, False, None]))
 
-    result = less_equal(arr, scalar)
+    result = pc.less_equal(arr, scalar)
     assert result.equals(con([True, True, False, None]))
 
-    result = greater(arr, scalar)
+    result = pc.greater(arr, scalar)
     assert result.equals(con([False, False, True, None]))
 
-    result = greater_equal(arr, scalar)
+    result = pc.greater_equal(arr, scalar)
     assert result.equals(con([False, True, True, None]))
 
 
@@ -1701,27 +1670,27 @@ def con(values):
     arr = con([1, 2, 3, None])
     scalar = pa.scalar(2)
 
-    result = equal(arr, scalar)
+    result = pc.equal(arr, scalar)
     assert result.equals(con([False, True, False, None]))
 
     if typ == "array":
         nascalar = pa.scalar(None, type="int64")
-        result = equal(arr, nascalar)
+        result = pc.equal(arr, nascalar)
         assert result.to_pylist() == [None, None, None, None]
 
-    result = not_equal(arr, scalar)
+    result = pc.not_equal(arr, scalar)
     assert result.equals(con([True, False, True, None]))
 
-    result = less(arr, scalar)
+    result = pc.less(arr, scalar)
     assert result.equals(con([True, False, False, None]))
 
-    result = less_equal(arr, scalar)
+    result = pc.less_equal(arr, scalar)
     assert result.equals(con([True, True, False, None]))
 
-    result = greater(arr, scalar)
+    result = pc.greater(arr, scalar)
     assert result.equals(con([False, False, True, None]))
 
-    result = greater_equal(arr, scalar)
+    result = pc.greater_equal(arr, scalar)
     assert result.equals(con([False, True, True, None]))
 
 
@@ -1737,14 +1706,14 @@ def test_compare_chunked_array_mixed():
         (arr_chunked, arr),
         (arr_chunked, arr_chunked2),
     ]:
-        result = equal(left, right)
+        result = pc.equal(left, right)
         assert result.equals(expected)
 
 
 def test_arithmetic_add():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = pc_add(left, right)
+    result = pc.add(left, right)
     expected = pa.array([1, 1, 4, 6, 8])
     assert result.equals(expected)
 
@@ -1752,7 +1721,7 @@ def test_arithmetic_add():
 def test_arithmetic_subtract():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = subtract(left, right)
+    result = pc.subtract(left, right)
     expected = pa.array([1, 3, 2, 2, 2])
     assert result.equals(expected)
 
@@ -1760,7 +1729,7 @@ def test_arithmetic_subtract():
 def test_arithmetic_multiply():
     left = pa.array([1, 2, 3, 4, 5])
     right = pa.array([0, -1, 1, 2, 3])
-    result = multiply(left, right)
+    result = pc.multiply(left, right)
     expected = pa.array([0, -2, 3, 8, 15])
     assert result.equals(expected)
 
@@ -1768,10 +1737,10 @@ def test_arithmetic_multiply():
 @pytest.mark.parametrize("ty", ["round", "round_to_multiple"])
 def test_round_to_integer(ty):
     if ty == "round":
-        round_func = pc_round
+        round = pc.round
         RoundOptions = partial(pc.RoundOptions, ndigits=0)
     elif ty == "round_to_multiple":
-        round_func = round_to_multiple
+        round = pc.round_to_multiple
         RoundOptions = partial(pc.RoundToMultipleOptions, multiple=1)
 
     values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
@@ -1789,7 +1758,7 @@ def test_round_to_integer(ty):
     }
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
-        result = round_func(values, options=options)
+        result = round(values, options=options)
         expected_array = pa.array(expected, type=pa.float64())
         assert expected_array.equals(result)
 
@@ -1806,11 +1775,11 @@ def test_round():
     }
     for ndigits, expected in ndigits_and_expected.items():
         options = pc.RoundOptions(ndigits, "half_towards_infinity")
-        result = pc_round(values, options=options)
+        result = pc.round(values, options=options)
         np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
-        assert pc_round(values, ndigits,
+        assert pc.round(values, ndigits,
                         round_mode="half_towards_infinity") == result
-        assert pc_round(values, ndigits, "half_towards_infinity") == result
+        assert pc.round(values, ndigits, "half_towards_infinity") == result
 
 
 @pytest.mark.numpy
@@ -1826,19 +1795,19 @@ def test_round_to_multiple():
     }
     for multiple, expected in multiple_and_expected.items():
         options = pc.RoundToMultipleOptions(multiple, "half_towards_infinity")
-        result = round_to_multiple(values, options=options)
+        result = pc.round_to_multiple(values, options=options)
         np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
-        assert round_to_multiple(values, multiple,
-                                 "half_towards_infinity") == result
+        assert pc.round_to_multiple(values, multiple,
+                                    "half_towards_infinity") == result
 
     for multiple in [0, -2, pa.scalar(-10.4)]:
         with pytest.raises(pa.ArrowInvalid,
                            match="Rounding multiple must be positive"):
-            round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(values, multiple=multiple)
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
-            round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(values, multiple=multiple)
 
 
 def test_round_binary():
@@ -1846,15 +1815,15 @@ def test_round_binary():
     scales = pa.array([-3, -2, -1, 0, 1, 2, 3], pa.int32())
     expected = pa.array(
         [0, 200, 350, 457, 123.5, 234.57, 345.678], pa.float64())
-    assert round_binary(values, scales) == expected
+    assert pc.round_binary(values, scales) == expected
 
     expect_zero = pa.scalar(0, pa.float64())
     expect_inf = pa.scalar(10, pa.float64())
     scale = pa.scalar(-1, pa.int32())
 
-    assert round_binary(
+    assert pc.round_binary(
         5.0, scale, round_mode="half_towards_zero") == expect_zero
-    assert round_binary(
+    assert pc.round_binary(
         5.0, scale, round_mode="half_towards_infinity") == expect_inf
 
 
@@ -1863,11 +1832,11 @@ def test_is_null():
     result = arr.is_null()
     expected = pa.array([False, False, False, True])
     assert result.equals(expected)
-    assert result.equals(is_null(arr))
+    assert result.equals(pc.is_null(arr))
     result = arr.is_valid()
     expected = pa.array([True, True, True, False])
     assert result.equals(expected)
-    assert result.equals(is_valid(arr))
+    assert result.equals(pc.is_valid(arr))
 
     arr = pa.chunked_array([[1, 2], [3, None]])
     result = arr.is_null()
@@ -1987,27 +1956,27 @@ def test_logical():
     a = pa.array([True, False, False, None])
     b = pa.array([True, True, False, True])
 
-    assert and_(a, b) == pa.array([True, False, False, None])
-    assert and_kleene(a, b) == pa.array([True, False, False, None])
+    assert pc.and_(a, b) == pa.array([True, False, False, None])
+    assert pc.and_kleene(a, b) == pa.array([True, False, False, None])
 
-    assert or_(a, b) == pa.array([True, True, False, None])
-    assert or_kleene(a, b) == pa.array([True, True, False, True])
+    assert pc.or_(a, b) == pa.array([True, True, False, None])
+    assert pc.or_kleene(a, b) == pa.array([True, True, False, True])
 
-    assert xor(a, b) == pa.array([False, True, False, None])
+    assert pc.xor(a, b) == pa.array([False, True, False, None])
 
-    assert invert(a) == pa.array([False, True, True, None])
+    assert pc.invert(a) == pa.array([False, True, True, None])
 
 
 def test_dictionary_decode():
     array = pa.array(["a", "a", "b", "c", "b"])
     dictionary_array = array.dictionary_encode()
-    dictionary_array_decode = dictionary_decode(dictionary_array)
+    dictionary_array_decode = pc.dictionary_decode(dictionary_array)
 
     assert array != dictionary_array
 
     assert array == dictionary_array_decode
-    assert array == dictionary_decode(array)
-    assert dictionary_encode(dictionary_array) == dictionary_array
+    assert array == pc.dictionary_decode(array)
+    assert pc.dictionary_encode(dictionary_array) == dictionary_array
 
 
 def test_cast():
@@ -2084,7 +2053,7 @@ def test_fsl_to_fsl_cast(value_type):
     # Different sized FSL
     cast_type = pa.list_(pa.field("element", value_type), 3)
     err_msg = 'Size of FixedSizeList is not the same.'
-    with pytest.raises(lib.ArrowTypeError, match=err_msg):
+    with pytest.raises(pa.lib.ArrowTypeError, match=err_msg):
         fsl.cast(cast_type)
 
 
@@ -2282,28 +2251,28 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
 def test_strptime():
     arr = pa.array(["5/1/2020", None, "12/13/1900"])
 
-    got = strptime(arr, format='%m/%d/%Y', unit='s')
+    got = pc.strptime(arr, format='%m/%d/%Y', unit='s')
     expected = pa.array(
         [datetime.datetime(2020, 5, 1), None, datetime.datetime(1900, 12, 13)],
         type=pa.timestamp('s'))
     assert got == expected
     # Positional format
-    assert strptime(arr, '%m/%d/%Y', unit='s') == got
+    assert pc.strptime(arr, '%m/%d/%Y', unit='s') == got
 
     expected = pa.array([datetime.datetime(2020, 1, 5), None, None],
                         type=pa.timestamp('s'))
-    got = strptime(arr, format='%d/%m/%Y', unit='s', error_is_null=True)
+    got = pc.strptime(arr, format='%d/%m/%Y', unit='s', error_is_null=True)
     assert got == expected
 
     with pytest.raises(pa.ArrowInvalid,
                        match="Failed to parse string: '5/1/2020'"):
-        strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=False)
+        pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=False)
 
     with pytest.raises(pa.ArrowInvalid,
                        match="Failed to parse string: '5/1/2020'"):
-        strptime(arr, format='%Y-%m-%d', unit='s')
+        pc.strptime(arr, format='%Y-%m-%d', unit='s')
 
-    got = strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=True)
+    got = pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=True)
     assert got == pa.array([None, None, None], type=pa.timestamp('s'))
 
 
@@ -2325,7 +2294,7 @@ def test_strftime():
             tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
             for fmt in formats:
                 options = pc.StrftimeOptions(fmt)
-                result = strftime(tsa, options=options)
+                result = pc.strftime(tsa, options=options)
                 # cast to the same type as result to ignore string vs large_string
                 expected = pa.array(ts.strftime(fmt)).cast(result.type)
                 assert result.equals(expected)
@@ -2334,34 +2303,34 @@ def test_strftime():
 
         # Default format
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
-        result = strftime(tsa, options=pc.StrftimeOptions())
+        result = pc.strftime(tsa, options=pc.StrftimeOptions())
         expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
         # Default format plus timezone
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
-        result = strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
+        result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
         expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S is equivalent to %S in arrow for unit="s"
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions("%S")
-        result = strftime(tsa, options=options)
+        result = pc.strftime(tsa, options=options)
         expected = pa.array(ts.strftime("%S")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S.%f is equivalent to %S in arrow for unit="us"
         tsa = pa.array(ts, type=pa.timestamp("us", timezone))
         options = pc.StrftimeOptions("%S")
-        result = strftime(tsa, options=options)
+        result = pc.strftime(tsa, options=options)
         expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
         assert result.equals(expected)
 
         # Test setting locale
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions(fmt, locale="C")
-        result = strftime(tsa, options=options)
+        result = pc.strftime(tsa, options=options)
         expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
@@ -2369,19 +2338,19 @@ def test_strftime():
     fmt = "%Y-%m-%dT%H:%M:%S"
     ts = pd.to_datetime(times)
     tsa = pa.array(ts, type=pa.timestamp("s"))
-    result = strftime(tsa, options=pc.StrftimeOptions(fmt))
+    result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
     expected = pa.array(ts.strftime(fmt)).cast(result.type)
 
     # Positional format
-    assert strftime(tsa, fmt) == result
+    assert pc.strftime(tsa, fmt) == result
 
     assert result.equals(expected)
     with pytest.raises(pa.ArrowInvalid,
                        match="Timezone not present, cannot convert to string"):
-        strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
+        pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
     with pytest.raises(pa.ArrowInvalid,
                        match="Timezone not present, cannot convert to string"):
-        strftime(tsa, options=pc.StrftimeOptions(fmt + "%z"))
+        pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z"))
 
 
 def _check_datetime_components(timestamps, timezone=None):
@@ -2429,42 +2398,42 @@ def _check_datetime_components(timestamps, timezone=None):
     microsecond = ts.dt.microsecond.astype("int64")
     nanosecond = ts.dt.nanosecond.astype("int64")
 
-    assert pc_year(tsa).equals(pa.array(year))
-    assert pc_is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
-    assert pc_month(tsa).equals(pa.array(month))
-    assert pc_day(tsa).equals(pa.array(day))
-    assert pc_day_of_week(tsa).equals(pa.array(dayofweek))
-    assert pc_day_of_year(tsa).equals(pa.array(dayofyear))
-    assert pc_iso_year(tsa).equals(pa.array(iso_year))
-    assert pc_iso_week(tsa).equals(pa.array(iso_week))
-    assert pc_iso_calendar(tsa).equals(iso_calendar)
-    assert pc_quarter(tsa).equals(pa.array(quarter))
-    assert pc_hour(tsa).equals(pa.array(hour))
-    assert pc_minute(tsa).equals(pa.array(minute))
-    assert pc_second(tsa).equals(pa.array(second))
-    assert pc_millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
-    assert pc_microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
-    assert pc_nanosecond(tsa).equals(pa.array(nanosecond))
-    assert pc_subsecond(tsa).equals(pa.array(subseconds))
-    assert pc_local_timestamp(tsa).equals(pa.array(ts.dt.tz_localize(None)))
+    assert pc.year(tsa).equals(pa.array(year))
+    assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
+    assert pc.month(tsa).equals(pa.array(month))
+    assert pc.day(tsa).equals(pa.array(day))
+    assert pc.day_of_week(tsa).equals(pa.array(dayofweek))
+    assert pc.day_of_year(tsa).equals(pa.array(dayofyear))
+    assert pc.iso_year(tsa).equals(pa.array(iso_year))
+    assert pc.iso_week(tsa).equals(pa.array(iso_week))
+    assert pc.iso_calendar(tsa).equals(iso_calendar)
+    assert pc.quarter(tsa).equals(pa.array(quarter))
+    assert pc.hour(tsa).equals(pa.array(hour))
+    assert pc.minute(tsa).equals(pa.array(minute))
+    assert pc.second(tsa).equals(pa.array(second))
+    assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
+    assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
+    assert pc.nanosecond(tsa).equals(pa.array(nanosecond))
+    assert pc.subsecond(tsa).equals(pa.array(subseconds))
+    assert pc.local_timestamp(tsa).equals(pa.array(ts.dt.tz_localize(None)))
 
     if ts.dt.tz:
         if ts.dt.tz is datetime.timezone.utc:
             # datetime with utc returns None for dst()
-            arr_is_dst = [False] * len(ts)
+            is_dst = [False] * len(ts)
         else:
-            arr_is_dst = ts.apply(lambda x: x.dst().seconds > 0)
-        assert pc_is_dst(tsa).equals(pa.array(arr_is_dst))
+            is_dst = ts.apply(lambda x: x.dst().seconds > 0)
+        assert pc.is_dst(tsa).equals(pa.array(is_dst))
 
     day_of_week_options = pc.DayOfWeekOptions(
         count_from_zero=False, week_start=1)
-    assert pc_day_of_week(tsa, options=day_of_week_options).equals(
+    assert pc.day_of_week(tsa, options=day_of_week_options).equals(
         pa.array(dayofweek + 1))
 
     week_options = pc.WeekOptions(
         week_starts_monday=True, count_from_zero=False,
         first_week_is_fully_in_year=False)
-    assert pc_week(tsa, options=week_options).equals(pa.array(iso_week))
+    assert pc.week(tsa, options=week_options).equals(pa.array(iso_week))
 
 
 @pytest.mark.pandas
@@ -2503,7 +2472,7 @@ def test_iso_calendar_longer_array(unit):
     # https://github.com/apache/arrow/issues/38655
     # ensure correct result for array length > 32
     arr = pa.array([datetime.datetime(2022, 1, 2, 9)]*50, pa.timestamp(unit))
-    result = pc_iso_calendar(arr)
+    result = pc.iso_calendar(arr)
     expected = pa.StructArray.from_arrays(
         [[2021]*50, [52]*50, [7]*50],
         names=['iso_year', 'iso_week', 'iso_day_of_week']
@@ -2542,18 +2511,18 @@ def test_assume_timezone():
         options = pc.AssumeTimezoneOptions(timezone)
         ta = pa.array(timestamps, type=ts_type)
         expected = timestamps.tz_localize(timezone)
-        result = pc_assume_timezone(ta, options=options)
+        result = pc.assume_timezone(ta, options=options)
         assert result.equals(pa.array(expected))
-        result = pc_assume_timezone(ta, timezone)  # Positional option
+        result = pc.assume_timezone(ta, timezone)  # Positional option
         assert result.equals(pa.array(expected))
 
         ta_zoned = pa.array(timestamps, type=pa.timestamp("ns", timezone))
         with pytest.raises(pa.ArrowInvalid, match="already have a timezone:"):
-            pc_assume_timezone(ta_zoned, options=options)
+            pc.assume_timezone(ta_zoned, options=options)
 
     invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss")
     with pytest.raises(ValueError, match="not found in timezone database"):
-        pc_assume_timezone(ta, options=invalid_options)
+        pc.assume_timezone(ta, options=invalid_options)
 
     timezone = "Europe/Brussels"
 
@@ -2566,18 +2535,18 @@ def test_assume_timezone():
     with pytest.raises(ValueError,
                        match="Timestamp doesn't exist in "
                        f"timezone '{timezone}'"):
-        pc_assume_timezone(nonexistent_array,
+        pc.assume_timezone(nonexistent_array,
                            options=options_nonexistent_raise)
 
     expected = pa.array(nonexistent.tz_localize(
         timezone, nonexistent="shift_forward"))
-    result = pc_assume_timezone(
+    result = pc.assume_timezone(
         nonexistent_array, options=options_nonexistent_latest)
     expected.equals(result)
 
     expected = pa.array(nonexistent.tz_localize(
         timezone, nonexistent="shift_backward"))
-    result = pc_assume_timezone(
+    result = pc.assume_timezone(
         nonexistent_array, options=options_nonexistent_earliest)
     expected.equals(result)
 
@@ -2590,16 +2559,15 @@ def test_assume_timezone():
     with pytest.raises(ValueError,
                        match="Timestamp is ambiguous in "
                              f"timezone '{timezone}'"):
-        pc_assume_timezone(ambiguous_array, options=options_ambiguous_raise)
+        pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=np.array([True, True, True]))
-    result = pc_assume_timezone(
+    expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
+    result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
-    expected = ambiguous.tz_localize(
-        timezone, ambiguous=np.array([False, False, False]))
-    result = pc_assume_timezone(
+    expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
+    result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
 
@@ -2628,15 +2596,15 @@ def _check_temporal_rounding(ts, values, unit):
         frequency = str(value) + unit_shorthand[unit]
         options = pc.RoundTemporalOptions(value, unit)
 
-        result = ceil_temporal(ta, options=options).to_pandas()
+        result = pc.ceil_temporal(ta, options=options).to_pandas()
         expected = ts.dt.ceil(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = floor_temporal(ta, options=options).to_pandas()
+        result = pc.floor_temporal(ta, options=options).to_pandas()
         expected = ts.dt.floor(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = round_temporal(ta, options=options).to_pandas()
+        result = pc.round_temporal(ta, options=options).to_pandas()
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
@@ -2649,29 +2617,29 @@ def _check_temporal_rounding(ts, values, unit):
             origin = ts.dt.floor(greater_unit[unit])
 
             if ta.type.tz is None:
-                result = ceil_temporal(ta, options=options).to_pandas()
+                result = pc.ceil_temporal(ta, options=options).to_pandas()
                 expected = (ts - origin).dt.ceil(frequency) + origin
                 np.testing.assert_array_equal(result, expected)
 
-            result = floor_temporal(ta, options=options).to_pandas()
+            result = pc.floor_temporal(ta, options=options).to_pandas()
             expected = (ts - origin).dt.floor(frequency) + origin
             np.testing.assert_array_equal(result, expected)
 
-            result = round_temporal(ta, options=options).to_pandas()
+            result = pc.round_temporal(ta, options=options).to_pandas()
             expected = (ts - origin).dt.round(frequency) + origin
             np.testing.assert_array_equal(result, expected)
 
         # Check RoundTemporalOptions partial defaults
         if unit == "day":
-            result = ceil_temporal(ta, multiple=value).to_pandas()
+            result = pc.ceil_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.ceil(frequency)
             np.testing.assert_array_equal(result, expected)
 
-            result = floor_temporal(ta, multiple=value).to_pandas()
+            result = pc.floor_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.floor(frequency)
             np.testing.assert_array_equal(result, expected)
 
-            result = round_temporal(ta, multiple=value).to_pandas()
+            result = pc.round_temporal(ta, multiple=value).to_pandas()
             expected = ts.dt.round(frequency)
             np.testing.assert_array_equal(result, expected)
 
@@ -2682,7 +2650,7 @@ def _check_temporal_rounding(ts, values, unit):
     if ta.type.tz is None:
         options = pc.RoundTemporalOptions(
             value, unit, ceil_is_strictly_greater=True)
-        result = ceil_temporal(ta, options=options)
+        result = pc.ceil_temporal(ta, options=options)
         expected = ts.dt.ceil(frequency)
 
         expected = np.where(
@@ -2695,15 +2663,15 @@ def _check_temporal_rounding(ts, values, unit):
     if unit == "day":
         frequency = "1D"
 
-        result = ceil_temporal(ta).to_pandas()
+        result = pc.ceil_temporal(ta).to_pandas()
         expected = ts.dt.ceil(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = floor_temporal(ta).to_pandas()
+        result = pc.floor_temporal(ta).to_pandas()
         expected = ts.dt.floor(frequency)
         np.testing.assert_array_equal(result, expected)
 
-        result = round_temporal(ta).to_pandas()
+        result = pc.round_temporal(ta).to_pandas()
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
@@ -2741,15 +2709,15 @@ def test_round_temporal(unit):
 
 def test_count():
     arr = pa.array([1, 2, 3, None, None])
-    assert count(arr).as_py() == 3
-    assert count(arr, mode='only_valid').as_py() == 3
-    assert count(arr, mode='only_null').as_py() == 2
-    assert count(arr, mode='all').as_py() == 5
-    assert count(arr, 'all').as_py() == 5
+    assert pc.count(arr).as_py() == 3
+    assert pc.count(arr, mode='only_valid').as_py() == 3
+    assert pc.count(arr, mode='only_null').as_py() == 2
+    assert pc.count(arr, mode='all').as_py() == 5
+    assert pc.count(arr, 'all').as_py() == 5
 
     with pytest.raises(ValueError,
                        match='"something else" is not a valid count mode'):
-        count(arr, 'something else')
+        pc.count(arr, 'something else')
 
 
 def test_index():
@@ -2791,15 +2759,15 @@ def test_partition_nth():
     data = list(range(100, 140))
     random.shuffle(data)
     pivot = 10
-    indices = partition_nth_indices(data, pivot=pivot)
+    indices = pc.partition_nth_indices(data, pivot=pivot)
     check_partition_nth(data, indices, pivot, "at_end")
     # Positional pivot argument
-    assert partition_nth_indices(data, pivot) == indices
+    assert pc.partition_nth_indices(data, pivot) == indices
 
     with pytest.raises(
             ValueError,
             match="'partition_nth_indices' cannot be called without options"):
-        partition_nth_indices(data)
+        pc.partition_nth_indices(data)
 
 
 def test_partition_nth_null_placement():
@@ -2808,14 +2776,14 @@ def test_partition_nth_null_placement():
 
     for pivot in (0, 7, 13, 19):
         for null_placement in ("at_start", "at_end"):
-            indices = partition_nth_indices(data, pivot=pivot,
-                                            null_placement=null_placement)
+            indices = pc.partition_nth_indices(data, pivot=pivot,
+                                               null_placement=null_placement)
             check_partition_nth(data, indices, pivot, null_placement)
 
 
 def test_select_k_array():
     def validate_select_k(select_k_indices, arr, order, stable_sort=False):
-        sorted_indices = sort_indices(arr, sort_keys=[("dummy", order)])
+        sorted_indices = pc.sort_indices(arr, sort_keys=[("dummy", order)])
         head_k_indices = sorted_indices.slice(0, len(select_k_indices))
         if stable_sort:
             assert select_k_indices == head_k_indices
@@ -2827,7 +2795,7 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
     arr = pa.array([1, 2, None, 0])
     for k in [0, 2, 4]:
         for order in ["descending", "ascending"]:
-            result = select_k_unstable(
+            result = pc.select_k_unstable(
                 arr, k=k, sort_keys=[("dummy", order)])
             validate_select_k(result, arr, order)
 
@@ -2837,26 +2805,26 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
         result = pc.bottom_k_unstable(arr, k=k)
         validate_select_k(result, arr, "ascending")
 
-    result = select_k_unstable(
+    result = pc.select_k_unstable(
         arr, options=pc.SelectKOptions(
             k=2, sort_keys=[("dummy", "descending")])
     )
     validate_select_k(result, arr, "descending")
 
-    result = select_k_unstable(
+    result = pc.select_k_unstable(
         arr, options=pc.SelectKOptions(k=2, sort_keys=[("dummy", "ascending")])
     )
     validate_select_k(result, arr, "ascending")
 
     # Position options
-    assert select_k_unstable(arr, 2,
-                             sort_keys=[("dummy", "ascending")]) == result
-    assert select_k_unstable(arr, 2, [("dummy", "ascending")]) == result
+    assert pc.select_k_unstable(arr, 2,
+                                sort_keys=[("dummy", "ascending")]) == result
+    assert pc.select_k_unstable(arr, 2, [("dummy", "ascending")]) == result
 
 
 def test_select_k_table():
     def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
-        sorted_indices = sort_indices(tbl, sort_keys=sort_keys)
+        sorted_indices = pc.sort_indices(tbl, sort_keys=sort_keys)
         head_k_indices = sorted_indices.slice(0, len(select_k_indices))
         if stable_sort:
             assert select_k_indices == head_k_indices
@@ -2867,11 +2835,11 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
 
     table = pa.table({"a": [1, 2, 0], "b": [1, 0, 1]})
     for k in [0, 2, 4]:
-        result = select_k_unstable(
+        result = pc.select_k_unstable(
             table, k=k, sort_keys=[("a", "ascending")])
         validate_select_k(result, table, sort_keys=[("a", "ascending")])
 
-        result = select_k_unstable(
+        result = pc.select_k_unstable(
             table, k=k, sort_keys=[(pc.field("a"), "ascending"), ("b", "ascending")])
         validate_select_k(
             result, table, sort_keys=[("a", "ascending"), ("b", "ascending")])
@@ -2886,65 +2854,65 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(
             ValueError,
             match="'select_k_unstable' cannot be called without options"):
-        select_k_unstable(table)
+        pc.select_k_unstable(table)
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a nonnegative `k`"):
-        select_k_unstable(table, k=-1, sort_keys=[("a", "ascending")])
+        pc.select_k_unstable(table, k=-1, sort_keys=[("a", "ascending")])
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a "
                              "non-empty `sort_keys`"):
-        select_k_unstable(table, k=2, sort_keys=[])
+        pc.select_k_unstable(table, k=2, sort_keys=[])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
+        pc.select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
+        pc.select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
 
 
 def test_array_sort_indices():
     arr = pa.array([1, 2, None, 0])
-    result = array_sort_indices(arr)
+    result = pc.array_sort_indices(arr)
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = array_sort_indices(arr, order="ascending")
+    result = pc.array_sort_indices(arr, order="ascending")
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = array_sort_indices(arr, order="descending")
+    result = pc.array_sort_indices(arr, order="descending")
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = array_sort_indices(arr, order="descending",
-                                null_placement="at_start")
+    result = pc.array_sort_indices(arr, order="descending",
+                                   null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
-    result = array_sort_indices(arr, "descending",
-                                null_placement="at_start")
+    result = pc.array_sort_indices(arr, "descending",
+                                   null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        array_sort_indices(arr, order="nonscending")
+        pc.array_sort_indices(arr, order="nonscending")
 
 
 def test_sort_indices_array():
     arr = pa.array([1, 2, None, 0])
-    result = sort_indices(arr)
+    result = pc.sort_indices(arr)
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = sort_indices(arr, sort_keys=[("dummy", "ascending")])
+    result = pc.sort_indices(arr, sort_keys=[("dummy", "ascending")])
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = sort_indices(arr, sort_keys=[("dummy", "descending")])
+    result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")])
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = sort_indices(arr, sort_keys=[("dummy", "descending")],
-                          null_placement="at_start")
+    result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")],
+                             null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
     # Positional `sort_keys`
-    result = sort_indices(arr, [("dummy", "descending")],
-                          null_placement="at_start")
+    result = pc.sort_indices(arr, [("dummy", "descending")],
+                             null_placement="at_start")
     assert result.to_pylist() == [2, 1, 0, 3]
     # Using SortOptions
-    result = sort_indices(
+    result = pc.sort_indices(
         arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")])
     )
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = sort_indices(
+    result = pc.sort_indices(
         arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")],
                                     null_placement="at_start")
     )
@@ -2954,134 +2922,134 @@ def test_sort_indices_array():
 def test_sort_indices_table():
     table = pa.table({"a": [1, 1, None, 0], "b": [1, 0, 0, 1]})
 
-    result = sort_indices(table, sort_keys=[("a", "ascending")])
+    result = pc.sort_indices(table, sort_keys=[("a", "ascending")])
     assert result.to_pylist() == [3, 0, 1, 2]
-    result = sort_indices(table, sort_keys=[(pc.field("a"), "ascending")],
-                          null_placement="at_start")
+    result = pc.sort_indices(table, sort_keys=[(pc.field("a"), "ascending")],
+                             null_placement="at_start")
     assert result.to_pylist() == [2, 3, 0, 1]
 
-    result = sort_indices(
+    result = pc.sort_indices(
         table, sort_keys=[("a", "descending"), ("b", "ascending")]
     )
     assert result.to_pylist() == [1, 0, 3, 2]
-    result = sort_indices(
+    result = pc.sort_indices(
         table, sort_keys=[("a", "descending"), ("b", "ascending")],
         null_placement="at_start"
     )
     assert result.to_pylist() == [2, 1, 0, 3]
     # Positional `sort_keys`
-    result = sort_indices(
+    result = pc.sort_indices(
         table, [("a", "descending"), ("b", "ascending")],
         null_placement="at_start"
     )
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="Must specify one or more sort keys"):
-        sort_indices(table)
+        pc.sort_indices(table)
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        sort_indices(table, sort_keys=[("unknown", "ascending")])
+        pc.sort_indices(table, sort_keys=[("unknown", "ascending")])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        sort_indices(table, sort_keys=[("a", "nonscending")])
+        pc.sort_indices(table, sort_keys=[("a", "nonscending")])
 
 
 def test_is_in():
     arr = pa.array([1, 2, None, 1, 2, 3])
 
-    result = is_in(arr, value_set=pa.array([1, 3, None]))
+    result = pc.is_in(arr, value_set=pa.array([1, 3, None]))
     assert result.to_pylist() == [True, False, True, True, False, True]
 
-    result = is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
+    result = pc.is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
     assert result.to_pylist() == [True, False, False, True, False, True]
 
-    result = is_in(arr, value_set=pa.array([1, 3]))
+    result = pc.is_in(arr, value_set=pa.array([1, 3]))
     assert result.to_pylist() == [True, False, False, True, False, True]
 
-    result = is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
+    result = pc.is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [True, False, False, True, False, True]
 
 
 def test_index_in():
     arr = pa.array([1, 2, None, 1, 2, 3])
 
-    result = index_in(arr, value_set=pa.array([1, 3, None]))
+    result = pc.index_in(arr, value_set=pa.array([1, 3, None]))
     assert result.to_pylist() == [0, None, 2, 0, None, 1]
 
-    result = index_in(arr, value_set=pa.array([1, 3, None]),
-                      skip_nulls=True)
+    result = pc.index_in(arr, value_set=pa.array([1, 3, None]),
+                         skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
-    result = index_in(arr, value_set=pa.array([1, 3]))
+    result = pc.index_in(arr, value_set=pa.array([1, 3]))
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
-    result = index_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
+    result = pc.index_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
     # Positional value_set
-    result = index_in(arr, pa.array([1, 3]), skip_nulls=True)
+    result = pc.index_in(arr, pa.array([1, 3]), skip_nulls=True)
     assert result.to_pylist() == [0, None, None, 0, None, 1]
 
 
 def test_quantile():
     arr = pa.array([1, 2, 3, 4])
 
-    result = quantile(arr)
+    result = pc.quantile(arr)
     assert result.to_pylist() == [2.5]
 
-    result = quantile(arr, interpolation='lower')
+    result = pc.quantile(arr, interpolation='lower')
     assert result.to_pylist() == [2]
-    result = quantile(arr, interpolation='higher')
+    result = pc.quantile(arr, interpolation='higher')
     assert result.to_pylist() == [3]
-    result = quantile(arr, interpolation='nearest')
+    result = pc.quantile(arr, interpolation='nearest')
     assert result.to_pylist() == [3]
-    result = quantile(arr, interpolation='midpoint')
+    result = pc.quantile(arr, interpolation='midpoint')
     assert result.to_pylist() == [2.5]
-    result = quantile(arr, interpolation='linear')
+    result = pc.quantile(arr, interpolation='linear')
     assert result.to_pylist() == [2.5]
 
     arr = pa.array([1, 2])
 
-    result = quantile(arr, q=[0.25, 0.5, 0.75])
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75])
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
-    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='lower')
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='lower')
     assert result.to_pylist() == [1, 1, 1]
-    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='higher')
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='higher')
     assert result.to_pylist() == [2, 2, 2]
-    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='midpoint')
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='midpoint')
     assert result.to_pylist() == [1.5, 1.5, 1.5]
-    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='nearest')
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='nearest')
     assert result.to_pylist() == [1, 1, 2]
-    result = quantile(arr, q=[0.25, 0.5, 0.75], interpolation='linear')
+    result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='linear')
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     # Positional `q`
-    result = quantile(arr, [0.25, 0.5, 0.75], interpolation='linear')
+    result = pc.quantile(arr, [0.25, 0.5, 0.75], interpolation='linear')
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
-        quantile(arr, q=1.1)
+        pc.quantile(arr, q=1.1)
     with pytest.raises(ValueError, match="not a valid quantile interpolation"):
-        quantile(arr, interpolation='zzz')
+        pc.quantile(arr, interpolation='zzz')
 
 
 def test_tdigest():
     arr = pa.array([1, 2, 3, 4])
-    result = tdigest(arr)
+    result = pc.tdigest(arr)
     assert result.to_pylist() == [2.5]
 
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
-    result = tdigest(arr)
+    result = pc.tdigest(arr)
     assert result.to_pylist() == [2.5]
 
     arr = pa.array([1, 2, 3, 4])
-    result = tdigest(arr, q=[0, 0.5, 1])
+    result = pc.tdigest(arr, q=[0, 0.5, 1])
     assert result.to_pylist() == [1, 2.5, 4]
 
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
-    result = tdigest(arr, [0, 0.5, 1])  # positional `q`
+    result = pc.tdigest(arr, [0, 0.5, 1])  # positional `q`
     assert result.to_pylist() == [1, 2.5, 4]
 
 
@@ -3097,32 +3065,32 @@ def test_min_max_element_wise():
     arr2 = pa.array([3, 1, 2])
     arr3 = pa.array([2, 3, None])
 
-    result = max_element_wise(arr1, arr2)
+    result = pc.max_element_wise(arr1, arr2)
     assert result == pa.array([3, 2, 3])
-    result = min_element_wise(arr1, arr2)
+    result = pc.min_element_wise(arr1, arr2)
     assert result == pa.array([1, 1, 2])
 
-    result = max_element_wise(arr1, arr2, arr3)
+    result = pc.max_element_wise(arr1, arr2, arr3)
     assert result == pa.array([3, 3, 3])
-    result = min_element_wise(arr1, arr2, arr3)
+    result = pc.min_element_wise(arr1, arr2, arr3)
     assert result == pa.array([1, 1, 2])
 
     # with specifying the option
-    result = max_element_wise(arr1, arr3, skip_nulls=True)
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([2, 3, 3])
-    result = min_element_wise(arr1, arr3, skip_nulls=True)
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([1, 2, 3])
-    result = max_element_wise(
+    result = pc.max_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([2, 3, 3])
-    result = min_element_wise(
+    result = pc.min_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([1, 2, 3])
 
     # not skipping nulls
-    result = max_element_wise(arr1, arr3, skip_nulls=False)
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([2, 3, None])
-    result = min_element_wise(arr1, arr3, skip_nulls=False)
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([1, 2, None])
 
 
@@ -3148,9 +3116,9 @@ def test_cumulative_sum(start, skip_nulls):
             if skip_nulls else pa.chunked_array([[0, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
             # Add `start` offset to expected array before comparing
-            expected = pc_add(expected_arrays[i], strt if strt is not None
+            expected = pc.add(expected_arrays[i], strt if strt is not None
                               else 0)
             assert result.equals(expected)
 
@@ -3169,16 +3137,16 @@ def test_cumulative_sum(start, skip_nulls):
             if skip_nulls else np.array([1, np.nan, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
             # Add `start` offset to expected array before comparing
-            expected = pc_add(expected_arrays[i], strt if strt is not None
+            expected = pc.add(expected_arrays[i], strt if strt is not None
                               else 0)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            cumulative_sum([1, 2, 3], start=strt)
+            pc.cumulative_sum([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3203,10 +3171,10 @@ def test_cumulative_prod(start, skip_nulls):
             if skip_nulls else pa.chunked_array([[1, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
             # Multiply `start` offset to expected array before comparing
-            expected = multiply(expected_arrays[i], strt if strt is not None
-                                else 1)
+            expected = pc.multiply(expected_arrays[i], strt if strt is not None
+                                   else 1)
             assert result.equals(expected)
 
     starts = [None, start, pa.scalar(start, type=pa.float32()),
@@ -3224,16 +3192,16 @@ def test_cumulative_prod(start, skip_nulls):
             if skip_nulls else np.array([1, np.nan, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
             # Multiply `start` offset to expected array before comparing
-            expected = multiply(expected_arrays[i], strt if strt is not None
-                                else 1)
+            expected = pc.multiply(expected_arrays[i], strt if strt is not None
+                                   else 1)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            cumulative_prod([1, 2, 3], start=strt)
+            pc.cumulative_prod([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3259,9 +3227,9 @@ def test_cumulative_max(start, skip_nulls):
             pa.chunked_array([[2, 2, None, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
             # Max `start` offset with expected array before comparing
-            expected = max_element_wise(
+            expected = pc.max_element_wise(
                 expected_arrays[i], strt if strt is not None else int(-1e9),
                 skip_nulls=False)
             assert result.equals(expected)
@@ -3281,9 +3249,9 @@ def test_cumulative_max(start, skip_nulls):
             if skip_nulls else np.array([2.5, 2.5, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
             # Max `start` offset with expected array before comparing
-            expected = max_element_wise(
+            expected = pc.max_element_wise(
                 expected_arrays[i], strt if strt is not None else -1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
@@ -3291,7 +3259,7 @@ def test_cumulative_max(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            cumulative_max([1, 2, 3], start=strt)
+            pc.cumulative_max([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
@@ -3317,9 +3285,9 @@ def test_cumulative_min(start, skip_nulls):
             pa.chunked_array([[5, 5, None, None, None, None]])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
             # Min `start` offset with expected array before comparing
-            expected = min_element_wise(
+            expected = pc.min_element_wise(
                 expected_arrays[i], strt if strt is not None else int(1e9),
                 skip_nulls=False)
             assert result.equals(expected)
@@ -3339,9 +3307,9 @@ def test_cumulative_min(start, skip_nulls):
             if skip_nulls else np.array([5.5, 5.5, None, None, None, None])
         ]
         for i, arr in enumerate(arrays):
-            result = cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+            result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
             # Min `start` offset with expected array before comparing
-            expected = min_element_wise(
+            expected = pc.min_element_wise(
                 expected_arrays[i], strt if strt is not None else 1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
@@ -3349,26 +3317,26 @@ def test_cumulative_min(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            cumulative_max([1, 2, 3], start=strt)
+            pc.cumulative_max([1, 2, 3], start=strt)
 
 
 def test_make_struct():
-    assert make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+    assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
 
-    assert make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+    assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
         'i': 1, 's': 'a'}
 
-    assert make_struct([1, 2, 3],
-                       "a b c".split()) == pa.StructArray.from_arrays([
-                           [1, 2, 3],
-                           "a b c".split()], names='0 1'.split())
+    assert pc.make_struct([1, 2, 3],
+                          "a b c".split()) == pa.StructArray.from_arrays([
+                              [1, 2, 3],
+                              "a b c".split()], names='0 1'.split())
 
     with pytest.raises(ValueError,
                        match="Array arguments must all be the same length"):
-        make_struct([1, 2, 3, 4], "a b c".split())
+        pc.make_struct([1, 2, 3, 4], "a b c".split())
 
     with pytest.raises(ValueError, match="0 arguments but 2 field names"):
-        make_struct(field_names=['one', 'two'])
+        pc.make_struct(field_names=['one', 'two'])
 
 
 def test_map_lookup():
@@ -3380,12 +3348,12 @@ def test_map_lookup():
     result_all = pa.array([[1], None, None, [5, 7], None],
                           type=pa.list_(pa.int32()))
 
-    assert map_lookup(arr, 'one', 'first') == result_first
-    assert map_lookup(arr, pa.scalar(
+    assert pc.map_lookup(arr, 'one', 'first') == result_first
+    assert pc.map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'first') == result_first
-    assert map_lookup(arr, pa.scalar(
+    assert pc.map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'last') == result_last
-    assert map_lookup(arr, pa.scalar(
+    assert pc.map_lookup(arr, pa.scalar(
         'one', type=pa.utf8()), 'all') == result_all
 
 
@@ -3395,42 +3363,42 @@ def test_struct_fields_options():
     c = pa.StructArray.from_arrays([a, b], ["a", "b"])
     arr = pa.StructArray.from_arrays([a, c], ["a", "c"])
 
-    assert struct_field(arr, '.c.b') == b
-    assert struct_field(arr, b'.c.b') == b
-    assert struct_field(arr, ['c', 'b']) == b
-    assert struct_field(arr, [1, 'b']) == b
-    assert struct_field(arr, (b'c', 'b')) == b
-    assert struct_field(arr, pc.field(('c', 'b'))) == b
+    assert pc.struct_field(arr, '.c.b') == b
+    assert pc.struct_field(arr, b'.c.b') == b
+    assert pc.struct_field(arr, ['c', 'b']) == b
+    assert pc.struct_field(arr, [1, 'b']) == b
+    assert pc.struct_field(arr, (b'c', 'b')) == b
+    assert pc.struct_field(arr, pc.field(('c', 'b'))) == b
 
-    assert struct_field(arr, '.a') == a
-    assert struct_field(arr, ['a']) == a
-    assert struct_field(arr, 'a') == a
-    assert struct_field(arr, pc.field(('a',))) == a
+    assert pc.struct_field(arr, '.a') == a
+    assert pc.struct_field(arr, ['a']) == a
+    assert pc.struct_field(arr, 'a') == a
+    assert pc.struct_field(arr, pc.field(('a',))) == a
 
-    assert struct_field(arr, indices=[1, 1]) == b
-    assert struct_field(arr, (1, 1)) == b
-    assert struct_field(arr, [0]) == a
-    assert struct_field(arr, []) == arr
+    assert pc.struct_field(arr, indices=[1, 1]) == b
+    assert pc.struct_field(arr, (1, 1)) == b
+    assert pc.struct_field(arr, [0]) == a
+    assert pc.struct_field(arr, []) == arr
 
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        struct_field(arr, 'foo')
+        pc.struct_field(arr, 'foo')
 
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        struct_field(arr, '.c.foo')
+        pc.struct_field(arr, '.c.foo')
 
     # drill into a non-struct array and continue to ask for a field
     with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
-        struct_field(arr, '.a.foo')
+        pc.struct_field(arr, '.a.foo')
 
     # TODO: https://issues.apache.org/jira/browse/ARROW-14853
-    # assert struct_field(arr) == arr
+    # assert pc.struct_field(arr) == arr
 
 
 def test_case_when():
-    assert case_when(make_struct([True, False, None],
-                                 [False, True, None]),
-                     [1, 2, 3],
-                     [11, 12, 13]) == pa.array([1, 12, None])
+    assert pc.case_when(pc.make_struct([True, False, None],
+                                       [False, True, None]),
+                        [1, 2, 3],
+                        [11, 12, 13]) == pa.array([1, 12, None])
 
 
 def test_list_element():
@@ -3441,12 +3409,12 @@ def test_list_element():
     lists = pa.array([l1, l2], list_type)
 
     index = 1
-    result = list_element(lists, index)
+    result = pa.compute.list_element(lists, index)
     expected = pa.array([None, {'a': 0.52, 'b': 3}], element_type)
     assert result.equals(expected)
 
     index = 4
-    result = list_element(lists, index)
+    result = pa.compute.list_element(lists, index)
     expected = pa.array([{'a': 5.6, 'b': 6}, {'a': .6, 'b': 8}], element_type)
     assert result.equals(expected)
 
@@ -3454,28 +3422,28 @@ def test_list_element():
 def test_count_distinct():
     samples = [datetime.datetime(year=y, month=1, day=1) for y in range(1992, 2092)]
     arr = pa.array(samples, pa.timestamp("ns"))
-    assert count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())
+    assert pc.count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())
 
 
 def test_count_distinct_options():
     arr = pa.array([1, 2, 3, None, None])
-    assert count_distinct(arr).as_py() == 3
-    assert count_distinct(arr, mode='only_valid').as_py() == 3
-    assert count_distinct(arr, mode='only_null').as_py() == 1
-    assert count_distinct(arr, mode='all').as_py() == 4
-    assert count_distinct(arr, 'all').as_py() == 4
+    assert pc.count_distinct(arr).as_py() == 3
+    assert pc.count_distinct(arr, mode='only_valid').as_py() == 3
+    assert pc.count_distinct(arr, mode='only_null').as_py() == 1
+    assert pc.count_distinct(arr, mode='all').as_py() == 4
+    assert pc.count_distinct(arr, 'all').as_py() == 4
 
 
 def test_utf8_normalize():
     arr = pa.array(["01²3"])
-    assert utf8_normalize(arr, form="NFC") == arr
-    assert utf8_normalize(arr, form="NFKC") == pa.array(["0123"])
-    assert utf8_normalize(arr, "NFD") == arr
-    assert utf8_normalize(arr, "NFKD") == pa.array(["0123"])
+    assert pc.utf8_normalize(arr, form="NFC") == arr
+    assert pc.utf8_normalize(arr, form="NFKC") == pa.array(["0123"])
+    assert pc.utf8_normalize(arr, "NFD") == arr
+    assert pc.utf8_normalize(arr, "NFKD") == pa.array(["0123"])
     with pytest.raises(
             ValueError,
             match='"NFZ" is not a valid Unicode normalization form'):
-        utf8_normalize(arr, form="NFZ")
+        pc.utf8_normalize(arr, form="NFZ")
 
 
 def test_random():
@@ -3517,7 +3485,7 @@ def test_rank_options_tiebreaker(tiebreaker, expected_values):
     rank_options = pc.RankOptions(sort_keys="ascending",
                                   null_placement="at_end",
                                   tiebreaker=tiebreaker)
-    result = rank(arr, options=rank_options)
+    result = pc.rank(arr, options=rank_options)
     expected = pa.array(expected_values, type=pa.uint64())
     assert result.equals(expected)
 
@@ -3527,24 +3495,24 @@ def test_rank_options():
     expected = pa.array([3, 1, 4, 6, 5, 7, 2], type=pa.uint64())
 
     # Ensure rank can be called without specifying options
-    result = rank(arr)
+    result = pc.rank(arr)
     assert result.equals(expected)
 
     # Ensure default RankOptions
-    result = rank(arr, options=pc.RankOptions())
+    result = pc.rank(arr, options=pc.RankOptions())
     assert result.equals(expected)
 
     # Ensure sort_keys tuple usage
-    result = rank(arr, options=pc.RankOptions(
+    result = pc.rank(arr, options=pc.RankOptions(
         sort_keys=[("b", "ascending")])
     )
     assert result.equals(expected)
 
-    result = rank(arr, null_placement="at_start")
+    result = pc.rank(arr, null_placement="at_start")
     expected_at_start = pa.array([5, 3, 6, 1, 7, 2, 4], type=pa.uint64())
     assert result.equals(expected_at_start)
 
-    result = rank(arr, sort_keys="descending")
+    result = pc.rank(arr, sort_keys="descending")
     expected_descending = pa.array([3, 4, 1, 6, 2, 7, 5], type=pa.uint64())
     assert result.equals(expected_descending)
 
@@ -3560,29 +3528,29 @@ def test_rank_quantile_options():
     expected = pa.array([0.7, 0.1, 0.7, 0.3, 0.7], type=pa.float64())
 
     # Ensure rank_quantile can be called without specifying options
-    result = rank_quantile(arr)
+    result = pc.rank_quantile(arr)
     assert result.equals(expected)
 
     # Ensure default RankOptions
-    result = rank_quantile(arr, options=pc.RankQuantileOptions())
+    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions())
     assert result.equals(expected)
 
     # Ensure sort_keys tuple usage
-    result = rank_quantile(arr, options=pc.RankQuantileOptions(
+    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions(
         sort_keys=[("b", "ascending")])
     )
     assert result.equals(expected)
 
-    result = rank_quantile(arr, null_placement="at_start")
+    result = pc.rank_quantile(arr, null_placement="at_start")
     expected_at_start = pa.array([0.3, 0.7, 0.3, 0.9, 0.3], type=pa.float64())
     assert result.equals(expected_at_start)
 
-    result = rank_quantile(arr, sort_keys="descending")
+    result = pc.rank_quantile(arr, sort_keys="descending")
     expected_descending = pa.array([0.7, 0.3, 0.7, 0.1, 0.7], type=pa.float64())
     assert result.equals(expected_descending)
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        rank_quantile(arr, sort_keys="XXX")
+        pc.rank_quantile(arr, sort_keys="XXX")
 
 
 def test_rank_normal_options():
@@ -3591,21 +3559,21 @@ def test_rank_normal_options():
     expected = pytest.approx(
         [0.5244005127080407, -1.2815515655446004, 0.5244005127080407,
          -0.5244005127080409, 0.5244005127080407])
-    result = rank_normal(arr)
+    result = pc.rank_normal(arr)
     assert result.to_pylist() == expected
-    result = rank_normal(arr, null_placement="at_end", sort_keys="ascending")
+    result = pc.rank_normal(arr, null_placement="at_end", sort_keys="ascending")
     assert result.to_pylist() == expected
-    result = rank_normal(arr, options=pc.RankQuantileOptions())
+    result = pc.rank_normal(arr, options=pc.RankQuantileOptions())
     assert result.to_pylist() == expected
 
     expected = pytest.approx(
         [-0.5244005127080409, 1.2815515655446004, -0.5244005127080409,
          0.5244005127080407, -0.5244005127080409])
-    result = rank_normal(arr, null_placement="at_start", sort_keys="descending")
+    result = pc.rank_normal(arr, null_placement="at_start", sort_keys="descending")
     assert result.to_pylist() == expected
-    result = rank_normal(arr,
-                         options=pc.RankQuantileOptions(null_placement="at_start",
-                                                        sort_keys="descending"))
+    result = pc.rank_normal(arr,
+                            options=pc.RankQuantileOptions(null_placement="at_start",
+                                                           sort_keys="descending"))
     assert result.to_pylist() == expected
 
 
@@ -3633,17 +3601,17 @@ def create_sample_expressions():
 
     # These expressions include at least one function call
     exprs_with_call = [a == b, a != b, a > b, c & j, c | j, ~c, d.is_valid(),
-                       a + b, a - b, a * b, a / b, negate(a),
-                       pc_add(a, b), subtract(a, b), divide(a, b),
-                       multiply(a, b), power(a, a), sqrt(a),
-                       exp(b), cos(b), sin(b), tan(b),
-                       acos(b), atan(b), asin(b), atan2(b, b),
-                       sinh(a), cosh(a), tanh(a),
-                       asinh(a), acosh(b), atanh(k),
-                       pc_abs(b), sign(a), bit_wise_not(a),
-                       bit_wise_and(a, a), bit_wise_or(a, a),
-                       bit_wise_xor(a, a), is_nan(b), is_finite(b),
-                       coalesce(a, b),
+                       a + b, a - b, a * b, a / b, pc.negate(a),
+                       pc.add(a, b), pc.subtract(a, b), pc.divide(a, b),
+                       pc.multiply(a, b), pc.power(a, a), pc.sqrt(a),
+                       pc.exp(b), pc.cos(b), pc.sin(b), pc.tan(b),
+                       pc.acos(b), pc.atan(b), pc.asin(b), pc.atan2(b, b),
+                       pc.sinh(a), pc.cosh(a), pc.tanh(a),
+                       pc.asinh(a), pc.acosh(b), pc.atanh(k),
+                       pc.abs(b), pc.sign(a), pc.bit_wise_not(a),
+                       pc.bit_wise_and(a, a), pc.bit_wise_or(a, a),
+                       pc.bit_wise_xor(a, a), pc.is_nan(b), pc.is_finite(b),
+                       pc.coalesce(a, b),
                        a.cast(pa.int32(), safe=False)]
 
     # These expressions test out various reference styles and may include function
@@ -3807,29 +3775,29 @@ def test_expression_call_function():
     field = pc.field("field")
 
     # no options
-    assert str(hour(field)) == "hour(field)"
+    assert str(pc.hour(field)) == "hour(field)"
 
     # default options
-    assert str(pc_round(field)) == "round(field)"
+    assert str(pc.round(field)) == "round(field)"
     # specified options
-    assert str(pc_round(field, ndigits=1)) == \
+    assert str(pc.round(field, ndigits=1)) == \
         "round(field, {ndigits=1, round_mode=HALF_TO_EVEN})"
 
     # Will convert non-expression arguments if possible
-    assert str(pc_add(field, 1)) == "add(field, 1)"
-    assert str(pc_add(field, pa.scalar(1))) == "add(field, 1)"
+    assert str(pc.add(field, 1)) == "add(field, 1)"
+    assert str(pc.add(field, pa.scalar(1))) == "add(field, 1)"
 
     # Invalid pc.scalar input gives original error message
     msg = "only other expressions allowed as arguments"
     with pytest.raises(TypeError, match=msg):
-        pc_add(field, object)
+        pc.add(field, object)
 
 
 def test_cast_table_raises():
     table = pa.table({'a': [1, 2]})
 
-    with pytest.raises(lib.ArrowTypeError):
-        cast(table, pa.int64())
+    with pytest.raises(pa.lib.ArrowTypeError):
+        pc.cast(table, pa.int64())
 
 
 @pytest.mark.parametrize("start,stop,expected", (
@@ -3856,9 +3824,9 @@ def test_list_slice_output_fixed(start, stop, step, expected, value_type,
         msg = ("Unable to produce FixedSizeListArray from "
                "non-FixedSizeListArray without `stop` being set.")
         with pytest.raises(pa.ArrowInvalid, match=msg):
-            list_slice(*args)
+            pc.list_slice(*args)
     else:
-        result = list_slice(*args)
+        result = pc.list_slice(*args)
         pylist = result.cast(pa.list_(pa.int8(),
                              result.type.list_size)).to_pylist()
         assert pylist == [e[::step] if e else e for e in expected]
@@ -3889,8 +3857,8 @@ def test_list_slice_output_variable(start, stop, step, value_type, list_type):
     if list_type == "fixed":
         list_type = pa.list_  # non fixed output type
 
-    result = list_slice(arr, start, stop, step,
-                        return_fixed_size_list=False)
+    result = pc.list_slice(arr, start, stop, step,
+                           return_fixed_size_list=False)
     assert result.type == list_type(value_type())
 
     pylist = result.cast(pa.list_(pa.int8())).to_pylist()
@@ -3907,7 +3875,7 @@ def test_list_slice_output_variable(start, stop, step, value_type, list_type):
     lambda: pa.large_list(pa.field('col', pa.int8()))))
 def test_list_slice_field_names_retained(return_fixed_size, type):
     arr = pa.array([[1]], type())
-    out = list_slice(arr, 0, 1, return_fixed_size_list=return_fixed_size)
+    out = pc.list_slice(arr, 0, 1, return_fixed_size_list=return_fixed_size)
     assert arr.type.field(0).name == out.type.field(0).name
 
     # Verify out type matches in type if return_fixed_size_list==None
@@ -3919,27 +3887,27 @@ def test_list_slice_bad_parameters():
     arr = pa.array([[1]], pa.list_(pa.int8(), 1))
     msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        list_slice(arr, -1, 1)  # negative start?
+        pc.list_slice(arr, -1, 1)  # negative start?
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        list_slice(arr, 2, 1)  # start > stop?
+        pc.list_slice(arr, 2, 1)  # start > stop?
 
     # TODO(ARROW-18281): start==stop -> empty lists
     with pytest.raises(pa.ArrowInvalid, match=msg):
-        list_slice(arr, 0, 0)  # start == stop?
+        pc.list_slice(arr, 0, 0)  # start == stop?
 
     # Step not >= 1
     msg = "`step` must be >= 1, got: "
     with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
-        list_slice(arr, 0, 1, step=0)
+        pc.list_slice(arr, 0, 1, step=0)
     with pytest.raises(pa.ArrowInvalid, match=msg + "-1"):
-        list_slice(arr, 0, 1, step=-1)
+        pc.list_slice(arr, 0, 1, step=-1)
 
 
 def check_run_end_encode_decode(value_type, run_end_encode_opts=None):
     values = [1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3]
     arr = pa.array(values, type=value_type)
-    encoded = run_end_encode(arr, options=run_end_encode_opts)
-    decoded = run_end_decode(encoded)
+    encoded = pc.run_end_encode(arr, options=run_end_encode_opts)
+    decoded = pc.run_end_decode(encoded)
     assert decoded.type == arr.type
     assert decoded.equals(arr)
 
@@ -3976,65 +3944,65 @@ def test_run_end_encode(value_type, option):
 def test_pairwise_diff():
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, 1, 1, None, None, 1])
-    result = pairwise_diff(arr, period=1)
+    result = pa.compute.pairwise_diff(arr, period=1)
     assert result.equals(expected)
 
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, None, 2, None, 1, None])
-    result = pairwise_diff(arr, period=2)
+    result = pa.compute.pairwise_diff(arr, period=2)
     assert result.equals(expected)
 
     # negative period
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.int8())
     expected = pa.array([-1, -1, None, None, -1, None], type=pa.int8())
-    result = pairwise_diff(arr, period=-1)
+    result = pa.compute.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # wrap around overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     expected = pa.array([255, 255, None, None, 255, None], type=pa.uint8())
-    result = pairwise_diff(arr, period=-1)
+    result = pa.compute.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # fail on overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     with pytest.raises(pa.ArrowInvalid,
                        match="overflow"):
-        pairwise_diff_checked(arr, period=-1)
+        pa.compute.pairwise_diff_checked(arr, period=-1)
 
 
 def test_pivot_wider():
     key_names = ["width", "height"]
 
-    result = pivot_wider(["height", "width", "depth"], [10, None, 11])
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11])
     assert result.as_py() == {}
 
-    result = pivot_wider(["height", "width", "depth"], [10, None, 11],
-                         key_names)
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                            key_names)
     assert result.as_py() == {"width": None, "height": 10}
     # check key order
     assert list(result.as_py()) == ["width", "height"]
 
-    result = pivot_wider(["height", "width", "depth"], [10, None, 11],
-                         key_names=key_names)
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                            key_names=key_names)
     assert result.as_py() == {"width": None, "height": 10}
 
     with pytest.raises(KeyError, match="Unexpected pivot key: depth"):
-        result = pivot_wider(["height", "width", "depth"], [10, None, 11],
-                             key_names=key_names,
-                             unexpected_key_behavior="raise")
+        result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                                key_names=key_names,
+                                unexpected_key_behavior="raise")
 
     with pytest.raises(ValueError, match="Encountered more than one non-null value"):
-        result = pivot_wider(["height", "width", "height"], [10, None, 11],
-                             key_names=key_names)
+        result = pc.pivot_wider(["height", "width", "height"], [10, None, 11],
+                                key_names=key_names)
 
 
 def test_winsorize():
     arr = pa.array([10, 4, 9, 8, 5, 3, 7, 2, 1, 6])
 
-    result = winsorize(arr, 0.1, 0.8)
+    result = pc.winsorize(arr, 0.1, 0.8)
     assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]
 
-    result = winsorize(
+    result = pc.winsorize(
         arr, options=pc.WinsorizeOptions(lower_limit=0.1, upper_limit=0.8))
     assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index a420af18864..07286125c4c 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 359ef62b1f8..7508d8f0b98 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -20,7 +20,7 @@
 
 import pytest
 
-from pyarrow._pyarrow_cpp_tests import get_cpp_tests  # type: ignore[unresolved_import]
+from pyarrow._pyarrow_cpp_tests import get_cpp_tests
 
 
 def inject_cpp_tests(ns):
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 71c96835d2c..2794d07e87c 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1502,7 +1502,7 @@ def signal_from_thread():
 
         # Interruption should have arrived timely
         assert last_duration <= 2.0
-        e = exc_info.__context__  # type: ignore[possibly-unbound-attribute]
+        e = exc_info.__context__
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
@@ -1989,8 +1989,7 @@ def test_write_quoting_style():
     buf = io.BytesIO()
     for write_options, res in [
         (WriteOptions(quoting_style='needed'), b'"c1"\n","\n""""\n'),
-        (WriteOptions(quoting_style='none'), pa.lib.ArrowInvalid), \
-            # type: ignore[unresolved-attribute]
+        (WriteOptions(quoting_style='none'), pa.lib.ArrowInvalid),
     ]:
         with CSVWriter(buf, t.schema, write_options=write_options) as writer:
             try:
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index 1ca5a9529e4..e06f479987c 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -42,8 +42,8 @@
     not has_ipc_support,
     reason='CUDA IPC not supported in platform `%s`' % (platform))
 
-global_context = cuda.Context(0)  # for flake8
-global_context1 = cuda.Context(0)  # for flake8
+global_context = None  # for flake8
+global_context1 = None  # for flake8
 
 
 def setup_module(module):
@@ -807,9 +807,8 @@ def test_create_table_with_device_buffers():
 
 
 def other_process_for_test_IPC(handle_buffer, expected_arr):
-    other_context = pa.cuda.Context(0)  # type: ignore[unresolved-attribute]
-    ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer) \
-        # type: ignore[unresolved-attribute]
+    other_context = pa.cuda.Context(0)
+    ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer)
     ipc_buf = other_context.open_ipc_buffer(ipc_handle)
     ipc_buf.context.synchronize()
     buf = ipc_buf.copy_to_host()
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index cfcf6673755..876f3c7f761 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -26,11 +26,10 @@
 cuda = pytest.importorskip("pyarrow.cuda")
 nb_cuda = pytest.importorskip("numba.cuda")
 
-from numba.cuda.cudadrv.devicearray import DeviceNDArray \
-    # type: ignore[unresolved_import]  # noqa: E402
+from numba.cuda.cudadrv.devicearray import DeviceNDArray  # noqa: E402
 
 
-context_choices = {}
+context_choices = None
 context_choice_ids = ['pyarrow.cuda', 'numba.cuda']
 
 
@@ -50,7 +49,7 @@ def teardown_module(module):
 @pytest.mark.parametrize("c", range(len(context_choice_ids)),
                          ids=context_choice_ids)
 def test_context(c):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
     assert ctx.handle == nb_ctx.handle.value
     assert ctx.handle == ctx.to_numba().handle.value
     ctx2 = cuda.Context.from_numba(nb_ctx)
@@ -73,8 +72,7 @@ def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
         return arr, buf
     elif target == 'device':
         arr, buf = make_random_buffer(size, target='host', dtype=dtype)
-        dbuf = ctx.new_buffer(size * dtype.itemsize) \
-            # type: ignore[possibly-unbound-attribute]
+        dbuf = ctx.new_buffer(size * dtype.itemsize)
         dbuf.copy_from_host(buf, position=0, nbytes=buf.size)
         return arr, dbuf
     raise ValueError('invalid target value')
@@ -85,7 +83,7 @@ def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 @pytest.mark.parametrize("size", [0, 1, 8, 1000])
 def test_from_object(c, dtype, size):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
     arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
 
     # Creating device buffer from numba DeviceNDArray:
@@ -163,7 +161,7 @@ def __cuda_array_interface__(self):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_memalloc(c, dtype):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
     dtype = np.dtype(dtype)
     # Allocate memory using numba context
     # Warning: this will not be reflected in pyarrow context manager
@@ -186,7 +184,7 @@ def test_numba_memalloc(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_pyarrow_memalloc(c, dtype):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
     size = 10
     arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
 
@@ -200,7 +198,7 @@ def test_pyarrow_memalloc(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_context(c, dtype):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
     size = 10
     with nb_cuda.gpus[0]:
         arr, cbuf = make_random_buffer(size, target='device',
@@ -219,7 +217,7 @@ def test_numba_context(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_pyarrow_jit(c, dtype):
-    ctx, nb_ctx = context_choices.get(c, (None, None))
+    ctx, nb_ctx = context_choices[c]
 
     @nb_cuda.jit
     def increment_by_one(an_array):
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index c9c35087839..e0116a4bb76 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -191,7 +191,7 @@ def test_visit_strings(tmpdir):
 
     strings = ['a', 'b', 'c']
     visited = []
-    mod._visit_strings(strings, visited.append)  # type: ignore[unresolved-attribute]
+    mod._visit_strings(strings, visited.append)
 
     assert visited == strings
 
@@ -200,4 +200,4 @@ def raise_on_b(s):
             if s == 'b':
                 raise ValueError('wtf')
 
-        mod._visit_strings(strings, raise_on_b)  # type: ignore[unresolved-attribute]
+        mod._visit_strings(strings, raise_on_b)
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 344201ff4f9..e7365643b84 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -32,7 +32,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -41,10 +41,7 @@
 import pyarrow.feather
 import pyarrow.fs as fs
 import pyarrow.json
-from pyarrow import lib  # type: ignore[unresolved-attribute]
-from pyarrow.compute import (is_in, hour, days_between, sort_indices, unique) \
-    # type: ignore[unresolved-attribute]
-from pyarrow.lib import is_threading_enabled  # type: ignore[unresolved_import]
+from pyarrow.lib import is_threading_enabled
 from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
                                 _configure_s3_limited_user, _filesystem_uri,
                                 change_cwd)
@@ -52,27 +49,17 @@
 try:
     import pandas as pd
 except ImportError:
-    pass
+    pd = None
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow.dataset import ParquetFragmentScanOptions, ParquetReadOptions, \
-        ParquetFileFragment, ParquetFileFormat  # type: ignore[possibly-unbound-attribute]
 except ImportError:
-    pass
+    ds = None
 
 try:
-    from pyarrow.dataset import (
-        OrcFileFormat  # type: ignore[possibly-unbound-import]
-    )
-except ImportError:
-    pass
-
-try:
-    import pyarrow.parquet as pq \
-        # type: ignore[unresolved-import]
+    import pyarrow.parquet as pq
 except ImportError:
-    pass
+    pq = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not dataset'
@@ -283,7 +270,7 @@ def multisourcefs(request):
 
 @pytest.fixture
 def dataset(mockfs):
-    format = ParquetFileFormat()
+    format = ds.ParquetFileFormat()
     selector = fs.FileSelector('subdir', recursive=True)
     options = ds.FileSystemFactoryOptions('subdir')
     options.partitioning = ds.DirectoryPartitioning(
@@ -351,7 +338,7 @@ def test_filesystem_dataset(mockfs):
     schema = pa.schema([
         pa.field('const', pa.int64())
     ])
-    file_format = ParquetFileFormat()
+    file_format = ds.ParquetFileFormat()
     paths = ['subdir/1/xxx/file0.parquet', 'subdir/2/yyy/file1.parquet']
     partitions = [ds.field('part') == x for x in range(1, 3)]
     fragments = [file_format.make_fragment(path, mockfs, part)
@@ -369,7 +356,7 @@ def test_filesystem_dataset(mockfs):
 
     for dataset in [dataset_from_fragments, dataset_from_paths]:
         assert isinstance(dataset, ds.FileSystemDataset)
-        assert isinstance(dataset.format, ParquetFileFormat)
+        assert isinstance(dataset.format, ds.ParquetFileFormat)
         assert dataset.partition_expression.equals(root_partition)
         assert set(dataset.files) == set(paths)
 
@@ -377,14 +364,14 @@ def test_filesystem_dataset(mockfs):
         for fragment, partition, path in zip(fragments, partitions, paths):
             assert fragment.partition_expression.equals(partition)
             assert fragment.path == path
-            assert isinstance(fragment.format, ParquetFileFormat)
-            assert isinstance(fragment, ParquetFileFragment)
+            assert isinstance(fragment.format, ds.ParquetFileFormat)
+            assert isinstance(fragment, ds.ParquetFileFragment)
             assert fragment.row_groups == [0]
             assert fragment.num_row_groups == 1
 
             row_group_fragments = list(fragment.split_by_row_group())
             assert fragment.num_row_groups == len(row_group_fragments) == 1
-            assert isinstance(row_group_fragments[0], ParquetFileFragment)
+            assert isinstance(row_group_fragments[0], ds.ParquetFileFragment)
             assert row_group_fragments[0].path == path
             assert row_group_fragments[0].row_groups == [0]
             assert row_group_fragments[0].num_row_groups == 1
@@ -503,7 +490,7 @@ def test_dataset(dataset, dataset_reader):
 def test_dataset_factory_inspect_schema_promotion(promotable_mockfs):
     mockfs, path1, path2 = promotable_mockfs
     factory = ds.FileSystemDatasetFactory(
-        mockfs, [path1, path2], ParquetFileFormat()
+        mockfs, [path1, path2], ds.ParquetFileFormat()
     )
 
     with pytest.raises(
@@ -547,7 +534,7 @@ def test_dataset_factory_inspect_schema_promotion(promotable_mockfs):
 def test_dataset_factory_inspect_bad_params(promotable_mockfs):
     mockfs, path1, path2 = promotable_mockfs
     factory = ds.FileSystemDatasetFactory(
-        mockfs, [path1, path2], ParquetFileFormat()
+        mockfs, [path1, path2], ds.ParquetFileFormat()
     )
 
     with pytest.raises(ValueError, match='Invalid promote_options: bad_option'):
@@ -955,11 +942,11 @@ def test_partition_keys():
 
 @pytest.mark.parquet
 def test_parquet_read_options():
-    opts1 = ParquetReadOptions()
-    opts2 = ParquetReadOptions(dictionary_columns=['a', 'b'])
-    opts3 = ParquetReadOptions(coerce_int96_timestamp_unit="ms")
-    opts4 = ParquetReadOptions(binary_type=pa.binary_view())
-    opts5 = ParquetReadOptions(list_type=pa.LargeListType)
+    opts1 = ds.ParquetReadOptions()
+    opts2 = ds.ParquetReadOptions(dictionary_columns=['a', 'b'])
+    opts3 = ds.ParquetReadOptions(coerce_int96_timestamp_unit="ms")
+    opts4 = ds.ParquetReadOptions(binary_type=pa.binary_view())
+    opts5 = ds.ParquetReadOptions(list_type=pa.LargeListType)
 
     assert opts1.dictionary_columns == set()
 
@@ -997,37 +984,37 @@ def test_parquet_read_options():
 
 @pytest.mark.parquet
 def test_parquet_file_format_read_options():
-    pff1 = ParquetFileFormat()
-    pff2 = ParquetFileFormat(dictionary_columns={'a'})
-    pff3 = ParquetFileFormat(coerce_int96_timestamp_unit="s")
-    pff4 = ParquetFileFormat(binary_type=pa.binary_view())
-    pff5 = ParquetFileFormat(list_type=pa.LargeListType)
-
-    assert pff1.read_options == ParquetReadOptions()
-    assert pff2.read_options == ParquetReadOptions(dictionary_columns=['a'])
-    assert pff3.read_options == ParquetReadOptions(
+    pff1 = ds.ParquetFileFormat()
+    pff2 = ds.ParquetFileFormat(dictionary_columns={'a'})
+    pff3 = ds.ParquetFileFormat(coerce_int96_timestamp_unit="s")
+    pff4 = ds.ParquetFileFormat(binary_type=pa.binary_view())
+    pff5 = ds.ParquetFileFormat(list_type=pa.LargeListType)
+
+    assert pff1.read_options == ds.ParquetReadOptions()
+    assert pff2.read_options == ds.ParquetReadOptions(dictionary_columns=['a'])
+    assert pff3.read_options == ds.ParquetReadOptions(
         coerce_int96_timestamp_unit="s")
-    assert pff4.read_options == ParquetReadOptions(
+    assert pff4.read_options == ds.ParquetReadOptions(
         binary_type=pa.binary_view())
-    assert pff5.read_options == ParquetReadOptions(
+    assert pff5.read_options == ds.ParquetReadOptions(
         list_type=pa.LargeListType)
 
 
 @pytest.mark.parquet
 def test_parquet_scan_options():
-    opts1 = ParquetFragmentScanOptions()
-    opts2 = ParquetFragmentScanOptions(buffer_size=4096)
-    opts3 = ParquetFragmentScanOptions(
+    opts1 = ds.ParquetFragmentScanOptions()
+    opts2 = ds.ParquetFragmentScanOptions(buffer_size=4096)
+    opts3 = ds.ParquetFragmentScanOptions(
         buffer_size=2**13, use_buffered_stream=True)
-    opts4 = ParquetFragmentScanOptions(buffer_size=2**13, pre_buffer=False)
-    opts5 = ParquetFragmentScanOptions(
+    opts4 = ds.ParquetFragmentScanOptions(buffer_size=2**13, pre_buffer=False)
+    opts5 = ds.ParquetFragmentScanOptions(
         thrift_string_size_limit=123456,
         thrift_container_size_limit=987654,)
-    opts6 = ParquetFragmentScanOptions(
+    opts6 = ds.ParquetFragmentScanOptions(
         page_checksum_verification=True)
     cache_opts = pa.CacheOptions(
         hole_size_limit=2**10, range_size_limit=8*2**10, lazy=True)
-    opts7 = ParquetFragmentScanOptions(pre_buffer=True, cache_options=cache_opts)
+    opts7 = ds.ParquetFragmentScanOptions(pre_buffer=True, cache_options=cache_opts)
 
     assert opts1.use_buffered_stream is False
     assert opts1.buffer_size == 2**13
@@ -1089,16 +1076,16 @@ def test_file_format_pickling(pickle_module):
             use_threads=False, block_size=14)),
     ]
     try:
-        formats.append(OrcFileFormat())
+        formats.append(ds.OrcFileFormat())
     except ImportError:
         pass
 
     if pq is not None:
         formats.extend([
-            ParquetFileFormat(),
-            ParquetFileFormat(dictionary_columns={'a'}),
-            ParquetFileFormat(use_buffered_stream=True),
-            ParquetFileFormat(
+            ds.ParquetFileFormat(),
+            ds.ParquetFileFormat(dictionary_columns={'a'}),
+            ds.ParquetFileFormat(use_buffered_stream=True),
+            ds.ParquetFileFormat(
                 use_buffered_stream=True,
                 buffer_size=4096,
                 thrift_string_size_limit=123,
@@ -1127,8 +1114,8 @@ def test_fragment_scan_options_pickling(pickle_module):
 
     if pq is not None:
         options.extend([
-            ParquetFragmentScanOptions(buffer_size=4096),
-            ParquetFragmentScanOptions(pre_buffer=True),
+            ds.ParquetFragmentScanOptions(buffer_size=4096),
+            ds.ParquetFragmentScanOptions(pre_buffer=True),
         ])
 
     for option in options:
@@ -1145,8 +1132,8 @@ def test_fragment_scan_options_pickling(pickle_module):
 @pytest.mark.parametrize('pre_buffer', [False, True])
 @pytest.mark.parquet
 def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
-    format = ParquetFileFormat(
-        read_options=ParquetReadOptions(dictionary_columns={"str"}),
+    format = ds.ParquetFileFormat(
+        read_options=ds.ParquetReadOptions(dictionary_columns={"str"}),
         pre_buffer=pre_buffer
     )
 
@@ -1218,7 +1205,7 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
 
 @pytest.mark.parquet
 def test_make_fragment(multisourcefs):
-    parquet_format = ParquetFileFormat()
+    parquet_format = ds.ParquetFileFormat()
     dataset = ds.dataset('/plain', filesystem=multisourcefs,
                          format=parquet_format)
 
@@ -1229,7 +1216,7 @@ def test_make_fragment(multisourcefs):
         row_group_fragment = parquet_format.make_fragment(path, multisourcefs,
                                                           row_groups=[0])
         for f in [fragment, row_group_fragment]:
-            assert isinstance(f, ParquetFileFragment)
+            assert isinstance(f, ds.ParquetFileFragment)
             assert f.path == path
             assert isinstance(f.filesystem, type(multisourcefs))
         assert row_group_fragment.row_groups == [0]
@@ -1245,7 +1232,7 @@ def test_make_fragment_with_size(s3_example_simple):
     """
     table, path, fs, uri, host, port, access_key, secret_key = s3_example_simple
 
-    file_format = ParquetFileFormat()
+    file_format = ds.ParquetFileFormat()
     paths = [path]
 
     fragments = [file_format.make_fragment(path, fs)
@@ -1276,7 +1263,7 @@ def test_make_fragment_with_size(s3_example_simple):
         fragments_with_size, format=file_format, schema=table.schema, filesystem=fs
     )
 
-    with pytest.raises(lib.ArrowInvalid, match='Parquet file size is 1 bytes'):
+    with pytest.raises(pyarrow.lib.ArrowInvalid, match='Parquet file size is 1 bytes'):
         table = dataset_with_size.to_table()
 
     # too large sizes -> error
@@ -1352,8 +1339,8 @@ def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module):
         arrays[1],
         arrays[2].dictionary_encode()
     ]
-    dictionary_format = ParquetFileFormat(
-        read_options=ParquetReadOptions(
+    dictionary_format = ds.ParquetFileFormat(
+        read_options=ds.ParquetReadOptions(
             dictionary_columns=['alpha', 'animal']
         ),
         use_buffered_stream=True,
@@ -1361,7 +1348,7 @@ def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module):
     )
 
     cases = [
-        (arrays, ParquetFileFormat()),
+        (arrays, ds.ParquetFileFormat()),
         (dictionary_arrays, dictionary_format)
     ]
     for arrays, format_ in cases:
@@ -1965,7 +1952,7 @@ def test_fragments_repr(tempdir, dataset):
     "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))])
 def test_partitioning_factory(mockfs, pickled, pickle_module):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
-    format = ParquetFileFormat()
+    format = ds.ParquetFileFormat()
 
     options = ds.FileSystemFactoryOptions('subdir')
     partitioning_factory = ds.DirectoryPartitioning.discover(['group', 'key'])
@@ -2000,7 +1987,7 @@ def test_partitioning_factory(mockfs, pickled, pickle_module):
 def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled,
                                          pickle_module):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
-    format = ParquetFileFormat()
+    format = ds.ParquetFileFormat()
     options = ds.FileSystemFactoryOptions('subdir')
 
     partitioning_factory = ds.DirectoryPartitioning.discover(
@@ -2229,8 +2216,7 @@ def test_dictionary_partitioning_outer_nulls_raises(tempdir):
 def test_positional_keywords_raises(tempdir):
     table = pa.table({'a': ['x', 'y', None], 'b': ['x', 'y', 'z']})
     with pytest.raises(TypeError):
-        ds.write_dataset(table, tempdir, "basename-{i}.arrow") \
-            # type: ignore[too-many-positional-arguments]
+        ds.write_dataset(table, tempdir, "basename-{i}.arrow")
 
 
 @pytest.mark.parquet
@@ -2608,12 +2594,12 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
     child1 = ds.FileSystemDatasetFactory(
         multisourcefs,
         fs.FileSelector('/plain'),
-        format=ParquetFileFormat()
+        format=ds.ParquetFileFormat()
     )
     child2 = ds.FileSystemDatasetFactory(
         multisourcefs,
         fs.FileSelector('/schema'),
-        format=ParquetFileFormat()
+        format=ds.ParquetFileFormat()
     )
     batch1 = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     batch2 = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["b"])
@@ -3085,7 +3071,7 @@ def test_file_format_inspect_fsspec(tempdir):
     assert fsspec_fs.ls(tempdir)[0].endswith("data.parquet")
 
     # inspect using dataset file format
-    format = ParquetFileFormat()
+    format = ds.ParquetFileFormat()
     # manually creating a PyFileSystem instead of using fs._ensure_filesystem
     # which would convert an fsspec local filesystem to a native one
     filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))
@@ -3158,13 +3144,13 @@ def test_filter_compute_expression(tempdir, dataset_reader):
     _, path = _create_single_file(tempdir, table)
     dataset = ds.dataset(str(path))
 
-    filter_ = is_in(ds.field('A'), pa.array(["a", "b"]))
+    filter_ = pc.is_in(ds.field('A'), pa.array(["a", "b"]))
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 3
 
-    filter_ = hour(ds.field('B')) >= 3
+    filter_ = pc.hour(ds.field('B')) >= 3
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 2
 
-    days = days_between(ds.field('B'), ds.field("C"))
+    days = pc.days_between(ds.field('B'), ds.field("C"))
     result = dataset_reader.to_table(dataset, columns={"days": days})
     assert result["days"].to_pylist() == [0, 1, 2, 3, 4]
 
@@ -3172,7 +3158,7 @@ def test_filter_compute_expression(tempdir, dataset_reader):
 def test_dataset_union(multisourcefs):
     child = ds.FileSystemDatasetFactory(
         multisourcefs, fs.FileSelector('/plain'),
-        format=ParquetFileFormat()
+        format=ds.ParquetFileFormat()
     )
     factory = ds.UnionDatasetFactory([child])
 
@@ -3395,7 +3381,7 @@ def test_orc_format(tempdir, dataset_reader):
     path = str(tempdir / 'test.orc')
     orc.write_table(table, path)
 
-    dataset = ds.dataset(path, format=OrcFileFormat())
+    dataset = ds.dataset(path, format=ds.OrcFileFormat())
     fragments = list(dataset.get_fragments())
     assert isinstance(fragments[0], ds.FileFragment)
     result = dataset_reader.to_table(dataset)
@@ -3450,7 +3436,7 @@ def test_orc_scan_options(tempdir, dataset_reader):
 
 def test_orc_format_not_supported():
     try:
-        from pyarrow.dataset import OrcFileFormat  # type: ignore[possibly-unbound-import]  # noqa
+        from pyarrow.dataset import OrcFileFormat  # noqa
     except ImportError:
         # ORC is not available, test error message
         with pytest.raises(
@@ -3469,7 +3455,7 @@ def test_orc_writer_not_implemented_for_dataset():
             pa.table({"a": range(10)}), format='orc', base_dir='/tmp'
         )
 
-    of = OrcFileFormat()
+    of = ds.OrcFileFormat()
     with pytest.raises(
         NotImplementedError,
         match="Writing datasets not yet implemented for this file format"
@@ -3687,7 +3673,7 @@ def test_column_names_encoding(tempdir, dataset_reader):
 
     # Reading as string without specifying encoding should produce an error
     dataset = ds.dataset(path, format='csv', schema=expected_schema)
-    with pytest.raises(lib.ArrowInvalid, match="invalid UTF8"):
+    with pytest.raises(pyarrow.lib.ArrowInvalid, match="invalid UTF8"):
         dataset_reader.to_table(dataset)
 
     # Setting the encoding in the read_options should transcode the data
@@ -4189,7 +4175,7 @@ def test_write_to_dataset_given_null_just_works(tempdir):
 
 def _sort_table(tab, sort_col):
     import pyarrow.compute as pc
-    sorted_indices = sort_indices(
+    sorted_indices = pc.sort_indices(
         tab, options=pc.SortOptions([(sort_col, 'ascending')]))
     return pc.take(tab, sorted_indices)
 
@@ -4637,7 +4623,7 @@ def test_write_dataset_max_open_files(tempdir):
     def _get_compare_pair(data_source, record_batch, file_format, col_id):
         num_of_files_generated = _get_num_of_files_generated(
             base_directory=data_source, file_format=file_format)
-        number_of_partitions = len(unique(record_batch[col_id]))
+        number_of_partitions = len(pa.compute.unique(record_batch[col_id]))
         return num_of_files_generated, number_of_partitions
 
     # CASE 1: when max_open_files=default & max_open_files >= num_of_partitions
@@ -4935,7 +4921,7 @@ def test_write_dataset_parquet(tempdir):
 
     # using custom options
     for version in ["1.0", "2.4", "2.6"]:
-        format = ParquetFileFormat()
+        format = ds.ParquetFileFormat()
         opts = format.make_write_options(version=version)
         assert "<pyarrow.dataset.ParquetFileWriteOptions" in repr(opts)
         base_dir = tempdir / f'parquet_dataset_version{version}'
@@ -5151,7 +5137,7 @@ def test_write_dataset_s3_put_only(s3_server):
     # required while writing a dataset in s3 where we have very
     # limited permissions and thus we can directly write the dataset
     # without creating a directory.
-    from pyarrow.fs import S3FileSystem  # type: ignore[possibly-unbound-import]
+    from pyarrow.fs import S3FileSystem
 
     # write dataset with s3 filesystem
     host, port, _, _ = s3_server['connection']
@@ -5233,7 +5219,7 @@ def test_dataset_null_to_dictionary_cast(tempdir, dataset_reader):
     fsds = ds.FileSystemDataset.from_paths(
         paths=[tempdir / "test.parquet"],
         schema=schema,
-        format=ParquetFileFormat(),
+        format=ds.ParquetFileFormat(),
         filesystem=fs.LocalFileSystem(),
     )
     table = dataset_reader.to_table(fsds)
@@ -5741,7 +5727,7 @@ def test_write_dataset_write_page_index(tempdir):
             arrays = [[1, 2, 3], [None, 5, None]]
             table = pa.Table.from_arrays(arrays, schema=schema)
 
-            file_format = ParquetFileFormat()
+            file_format = ds.ParquetFileFormat()
             base_dir = tempdir / f"write_page_index_{write_page_index}"
             ds.write_dataset(
                 table,
@@ -5821,7 +5807,7 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
     table_orig = pa.table({'a': [1, 2, 3, 4]})
 
     # Write a sample dataset with page checksum enabled
-    pq_write_format = ParquetFileFormat()
+    pq_write_format = pa.dataset.ParquetFileFormat()
     write_options = pq_write_format.make_write_options(
         write_page_checksum=True)
 
@@ -5834,9 +5820,9 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
     )
 
     # Open dataset and verify that the data is correct
-    pq_scan_opts_crc = ParquetFragmentScanOptions(
+    pq_scan_opts_crc = ds.ParquetFragmentScanOptions(
         page_checksum_verification=True)
-    pq_read_format_crc = ParquetFileFormat(
+    pq_read_format_crc = pa.dataset.ParquetFileFormat(
         default_fragment_scan_options=pq_scan_opts_crc)
     table_check = ds.dataset(
         original_dir_path,
@@ -5866,9 +5852,9 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
 
     # Case 1: Reading the corrupted file with dataset().to_table() and without
     # page checksum verification succeeds but yields corrupted data
-    pq_scan_opts_no_crc = ParquetFragmentScanOptions(
+    pq_scan_opts_no_crc = ds.ParquetFragmentScanOptions(
         page_checksum_verification=False)
-    pq_read_format_no_crc = ParquetFileFormat(
+    pq_read_format_no_crc = pa.dataset.ParquetFileFormat(
         default_fragment_scan_options=pq_scan_opts_no_crc)
     table_corrupt = ds.dataset(
         corrupted_dir_path, format=pq_read_format_no_crc).to_table()
@@ -5897,10 +5883,10 @@ def test_make_write_options_error():
              "'pyarrow._dataset_parquet.ParquetFileFormat' objects "
              "doesn't apply to a 'int'")
     with pytest.raises(TypeError) as excinfo:
-        ParquetFileFormat.make_write_options(43)
+        pa.dataset.ParquetFileFormat.make_write_options(43)
     assert msg_1 in str(excinfo.value) or msg_2 in str(excinfo.value)
 
-    pformat = ParquetFileFormat()
+    pformat = pa.dataset.ParquetFileFormat()
     msg = "make_write_options\\(\\) takes exactly 0 positional arguments"
     with pytest.raises(TypeError, match=msg):
         pformat.make_write_options(43)
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index dd59fec64e2..eb79121b1cd 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -23,26 +23,18 @@
 
 import pytest
 
-from pyarrow.dataset import (
-    ParquetFragmentScanOptions,  # type: ignore[possibly-unbound-attribute]
-    ParquetFileFormat,  # type: ignore[possibly-unbound-attribute]
-)
-
 encryption_unavailable = False
 
 try:
     import pyarrow.parquet as pq
     import pyarrow.dataset as ds
 except ImportError:
-    pass
+    pq = None
+    ds = None
 
 try:
     from pyarrow.tests.parquet.encryption import InMemoryKmsClient
     import pyarrow.parquet.encryption as pe
-    from pyarrow.dataset import (
-        ParquetEncryptionConfig,  # type: ignore[possibly-unbound-attribute]
-        ParquetDecryptionConfig,  # type: ignore[possibly-unbound-attribute]
-    )
 except ImportError:
     encryption_unavailable = True
 
@@ -114,15 +106,15 @@ def test_dataset_encryption_decryption():
     kms_connection_config = create_kms_connection_config()
 
     crypto_factory = pe.CryptoFactory(kms_factory)
-    parquet_encryption_cfg = ParquetEncryptionConfig(
+    parquet_encryption_cfg = ds.ParquetEncryptionConfig(
         crypto_factory, kms_connection_config, encryption_config
     )
-    parquet_decryption_cfg = ParquetDecryptionConfig(
+    parquet_decryption_cfg = ds.ParquetDecryptionConfig(
         crypto_factory, kms_connection_config, decryption_config
     )
 
     # create write_options with dataset encryption config
-    pformat = ParquetFileFormat()
+    pformat = pa.dataset.ParquetFileFormat()
     write_options = pformat.make_write_options(encryption_config=parquet_encryption_cfg)
 
     mockfs = fs._MockFileSystem()
@@ -137,15 +129,15 @@ def test_dataset_encryption_decryption():
     )
 
     # read without decryption config -> should error is dataset was properly encrypted
-    pformat = ParquetFileFormat()
+    pformat = pa.dataset.ParquetFileFormat()
     with pytest.raises(IOError, match=r"no decryption"):
         ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     # set decryption config for parquet fragment scan options
-    pq_scan_opts = ParquetFragmentScanOptions(
+    pq_scan_opts = ds.ParquetFragmentScanOptions(
         decryption_config=parquet_decryption_cfg
     )
-    pformat = ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
+    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
     dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     assert table.equals(dataset.to_table())
@@ -153,11 +145,11 @@ def test_dataset_encryption_decryption():
     # set decryption properties for parquet fragment scan options
     decryption_properties = crypto_factory.file_decryption_properties(
         kms_connection_config, decryption_config)
-    pq_scan_opts = ParquetFragmentScanOptions(
+    pq_scan_opts = ds.ParquetFragmentScanOptions(
         decryption_properties=decryption_properties
     )
 
-    pformat = ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
+    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
     dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
     assert table.equals(dataset.to_table())
@@ -172,7 +164,7 @@ def test_write_dataset_parquet_without_encryption():
 
     # Set the encryption configuration using ParquetFileFormat
     # and make_write_options
-    pformat = ParquetFileFormat()
+    pformat = pa.dataset.ParquetFileFormat()
 
     with pytest.raises(NotImplementedError):
         _ = pformat.make_write_options(encryption_config="some value")
@@ -210,14 +202,14 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
         plaintext_footer=False,
         data_key_length_bits=128,
     )
-    pqe_config = ParquetEncryptionConfig(
+    pqe_config = ds.ParquetEncryptionConfig(
         crypto_factory, kms_config, encryption_config
     )
-    pqd_config = ParquetDecryptionConfig(
+    pqd_config = ds.ParquetDecryptionConfig(
         crypto_factory, kms_config, pe.DecryptionConfiguration()
     )
-    scan_options = ParquetFragmentScanOptions(decryption_config=pqd_config)
-    file_format = ParquetFileFormat(default_fragment_scan_options=scan_options)
+    scan_options = ds.ParquetFragmentScanOptions(decryption_config=pqd_config)
+    file_format = ds.ParquetFileFormat(default_fragment_scan_options=scan_options)
     write_options = file_format.make_write_options(encryption_config=pqe_config)
     file_decryption_properties = crypto_factory.file_decryption_properties(kms_config)
 
diff --git a/python/pyarrow/tests/test_exec_plan.py b/python/pyarrow/tests/test_exec_plan.py
index 177f3baa378..d85a2c21524 100644
--- a/python/pyarrow/tests/test_exec_plan.py
+++ b/python/pyarrow/tests/test_exec_plan.py
@@ -220,14 +220,13 @@ def test_table_join_keys_order():
 
 
 def test_filter_table_errors():
-    from pyarrow.compute import divide  # type: ignore[unresolved-attribute]
     t = pa.table({
         "a": [1, 2, 3, 4, 5],
         "b": [10, 20, 30, 40, 50]
     })
 
     with pytest.raises(pa.ArrowTypeError):
-        _filter_table(t, divide(pc.field("a"), pc.scalar(2)))
+        _filter_table(t, pc.divide(pc.field("a"), pc.scalar(2)))
 
     with pytest.raises(pa.ArrowInvalid):
         _filter_table(t, (pc.field("Z") <= pc.scalar(2)))
@@ -268,16 +267,14 @@ def test_filter_table_ordering():
 
 
 def test_complex_filter_table():
-    from pyarrow.compute import bit_wise_and, multiply \
-        # type: ignore[unresolved-attribute]
     t = pa.table({
         "a": [1, 2, 3, 4, 5, 6, 6],
         "b": [10, 20, 30, 40, 50, 60, 61]
     })
 
     result = _filter_table(
-        t, ((bit_wise_and(pc.field("a"), pc.scalar(1)) == pc.scalar(0)) &
-            (multiply(pc.field("a"), pc.scalar(10)) == pc.field("b")))
+        t, ((pc.bit_wise_and(pc.field("a"), pc.scalar(1)) == pc.scalar(0)) &
+            (pc.multiply(pc.field("a"), pc.scalar(10)) == pc.field("b")))
     )
 
     assert result == pa.table({
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index c1e5db238ad..ebac37e862b 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -27,7 +27,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
@@ -1353,11 +1353,11 @@ def test_cpp_extension_in_python(tmpdir):
     sys.path.insert(0, str(tmpdir))
     mod = __import__('extensions')
 
-    uuid_type = mod._make_uuid_type()  # type: ignore[unresolved-attribute]
+    uuid_type = mod._make_uuid_type()
     assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
-    array = mod._make_uuid_array()  # type: ignore[unresolved-attribute]
+    array = mod._make_uuid_array()
     assert array.type == uuid_type
     assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba']
     assert array[0].as_py() == b'abcdefghijklmno0'
@@ -1882,7 +1882,7 @@ def test_bool8_from_numpy_conversion():
         ValueError,
         match="Cannot convert 0-D array to bool8 array",
     ):
-        pa.Bool8Array.from_numpy(np.bool_())  # type: ignore[no-matching-overload]
+        pa.Bool8Array.from_numpy(np.bool_())
 
     # must use compatible storage type
     with pytest.raises(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index c4631903c1a..054bf920b26 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -26,7 +26,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -63,7 +63,7 @@ def compression(request):
     yield request.param
 
 
-TEST_FILES = []
+TEST_FILES = None
 
 
 def setup_module(module):
@@ -72,12 +72,11 @@ def setup_module(module):
 
 
 def teardown_module(module):
-    if TEST_FILES is not None:
-        for path in TEST_FILES:
-            try:
-                os.remove(path)
-            except os.error:
-                pass
+    for path in TEST_FILES:
+        try:
+            os.remove(path)
+        except os.error:
+            pass
 
 
 @pytest.mark.pandas
@@ -591,7 +590,7 @@ def test_sparse_dataframe(version):
     # GH #221
     data = {'A': [0, 1, 2],
             'B': [1, 0, 1]}
-    df = pd.DataFrame(data).to_sparse(fill_value=1)  # type: ignore[call-non-callable]
+    df = pd.DataFrame(data).to_sparse(fill_value=1)
     expected = df.to_dense()
     _check_pandas_roundtrip(df, expected, version=version)
 
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index f14e5215b33..e9e99d8eb83 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -26,18 +26,17 @@
 import threading
 import time
 import traceback
-from json import dumps as json_dumps
-from json import dumps as json_loads
+import json
 from datetime import datetime
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 import pyarrow as pa
 
-from pyarrow.lib import IpcReadOptions, tobytes  # type: ignore[unresolved_import]
+from pyarrow.lib import IpcReadOptions, tobytes
 from pyarrow.util import find_free_port
 from pyarrow.tests import util
 
@@ -50,35 +49,8 @@
         ClientMiddleware, ClientMiddlewareFactory,
     )
 except ImportError:
-    class MockContextManager:
-        def __init__(self, *args, **kwargs):
-            pass
-
-        def __enter__(self):
-            return self
-
-        def __exit__(self, exc_type, exc_val, exc_tb):
-            pass
-
-    class FlightServerBase(MockContextManager):
-        def serve(self):
-            pass
-
-    class FlightClient(MockContextManager):
-        def get_flight_info(self, *args, **kwargs):
-            pass
-
-        def do_action(self, *args, **kwargs):
-            pass
-
-        def do_get(self, *args, **kwargs):
-            pass
-
-        def do_put(self, *args, **kwargs):
-            pass
-
-        def close(self):
-            pass
+    flight = None
+    FlightClient, FlightServerBase = object, object
     ServerAuthHandler, ClientAuthHandler = object, object
     ServerMiddleware, ServerMiddlewareFactory = object, object
     ClientMiddleware, ClientMiddlewareFactory = object, object
@@ -344,7 +316,7 @@ class InvalidStreamFlightServer(FlightServerBase):
     def do_get(self, context, ticket):
         data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())]
         data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())]
-        assert data1[0].type != data2[0].type
+        assert data1.type != data2.type
         table1 = pa.Table.from_arrays(data1, names=['a'])
         table2 = pa.Table.from_arrays(data2, names=['a'])
         assert table1.schema == self.schema
@@ -1121,7 +1093,7 @@ def test_client_wait_for_available():
     server = None
 
     def serve():
-        nonlocal server
+        global server
         time.sleep(0.5)
         server = FlightServerBase(location)
         server.serve()
@@ -1771,7 +1743,7 @@ def test_flight_do_put_limit():
             with pytest.raises(flight.FlightWriteSizeExceededError,
                                match="exceeded soft limit") as excinfo:
                 writer.write_batch(large_batch)
-            assert excinfo.value.limit == 4096  # type: ignore[unresolved-attribute]
+            assert excinfo.value.limit == 4096
             smaller_batches = [
                 large_batch.slice(0, 384),
                 large_batch.slice(384),
@@ -2385,7 +2357,7 @@ class ActionNoneFlightServer(EchoFlightServer):
 
     def do_action(self, context, action):
         if action.type == "get_value":
-            return [json_dumps(self.VALUES).encode('utf-8')]
+            return [json.dumps(self.VALUES).encode('utf-8')]
         elif action.type == "append":
             self.VALUES.append(True)
             return None
@@ -2402,7 +2374,7 @@ def test_none_action_side_effect():
             FlightClient(('localhost', server.port)) as client:
         client.do_action(flight.Action("append", b""))
         r = client.do_action(flight.Action("get_value", b""))
-        assert json_loads(next(r).body.to_pybytes()) == [True]
+        assert json.loads(next(r).body.to_pybytes()) == [True]
 
 
 @pytest.mark.slow  # Takes a while for gRPC to "realize" writes fail
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 7c891c7919d..a5a10fa55c6 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -39,31 +39,6 @@
                         copy_files)
 from pyarrow.util import find_free_port
 
-try:
-    from pyarrow.fs import (
-        AwsDefaultS3RetryStrategy,  # type: ignore[possibly-unbound-import]
-        AwsStandardS3RetryStrategy,  # type: ignore[possibly-unbound-import]
-        S3FileSystem,  # type: ignore[possibly-unbound-import]
-        resolve_s3_region,  # type: ignore[possibly-unbound-import]
-        S3RetryStrategy  # type: ignore[possibly-unbound-import]
-    )
-except ImportError:
-    pass
-
-try:
-    from pyarrow.fs import AzureFileSystem  # type: ignore[possibly-unbound-import]
-except ImportError:
-    pass
-
-try:
-    from pyarrow.fs import GcsFileSystem  # type: ignore[possibly-unbound-import]
-except ImportError:
-    pass
-
-try:
-    from pyarrow.fs import HadoopFileSystem  # type: ignore[possibly-unbound-import]
-except ImportError:
-    pass
 
 here = os.path.dirname(os.path.abspath(__file__))
 
@@ -236,6 +211,7 @@ def subtree_localfs(request, tempdir, localfs):
 @pytest.fixture
 def gcsfs(request, gcs_server):
     request.config.pyarrow.requires('gcs')
+    from pyarrow.fs import GcsFileSystem
 
     host, port = gcs_server['connection']
     bucket = 'pyarrow-filesystem/'
@@ -265,6 +241,7 @@ def gcsfs(request, gcs_server):
 @pytest.fixture
 def s3fs(request, s3_server):
     request.config.pyarrow.requires('s3')
+    from pyarrow.fs import S3FileSystem
 
     host, port, access_key, secret_key = s3_server['connection']
     bucket = 'pyarrow-filesystem/'
@@ -324,6 +301,7 @@ def subtree_s3fs(request, s3fs):
 @pytest.fixture
 def azurefs(request, azure_server):
     request.config.pyarrow.requires('azure')
+    from pyarrow.fs import AzureFileSystem
 
     host, port, account_name, account_key = azure_server['connection']
     azurite_authority = f"{host}:{port}"
@@ -355,6 +333,8 @@ def hdfs(request, hdfs_connection):
     if not pa.have_libhdfs():
         pytest.skip('Cannot locate libhdfs')
 
+    from pyarrow.fs import HadoopFileSystem
+
     host, port, user = hdfs_connection
     fs = HadoopFileSystem(host, port=port, user=user)
 
@@ -535,6 +515,7 @@ def skip_azure(fs, reason):
 
 @pytest.mark.s3
 def test_s3fs_limited_permissions_create_bucket(s3_server):
+    from pyarrow.fs import S3FileSystem
     _configure_s3_limited_user(s3_server, _minio_limited_policy,
                                'test_fs_limited_user', 'limited123')
     host, port, _, _ = s3_server['connection']
@@ -1166,6 +1147,7 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
 @pytest.mark.gcs
 def test_gcs_options(pickle_module):
+    from pyarrow.fs import GcsFileSystem
     dt = datetime.now()
     fs = GcsFileSystem(access_token='abc',
                        target_service_account='service_account@apache',
@@ -1203,6 +1185,10 @@ def test_gcs_options(pickle_module):
 
 @pytest.mark.s3
 def test_s3_options(pickle_module):
+    from pyarrow.fs import (AwsDefaultS3RetryStrategy,
+                            AwsStandardS3RetryStrategy, S3FileSystem,
+                            S3RetryStrategy)
+
     fs = S3FileSystem(access_key='access', secret_key='secret',
                       session_token='token', region='us-east-2',
                       scheme='https', endpoint_override='localhost:8999')
@@ -1303,6 +1289,8 @@ def test_s3_options(pickle_module):
 
 @pytest.mark.s3
 def test_s3_proxy_options(monkeypatch, pickle_module):
+    from pyarrow.fs import S3FileSystem
+
     # The following two are equivalent:
     proxy_opts_1_dict = {'scheme': 'http', 'host': 'localhost', 'port': 8999}
     proxy_opts_1_str = 'http://localhost:8999'
@@ -1442,6 +1430,8 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
 
 @pytest.mark.s3
 def test_s3fs_wrong_region():
+    from pyarrow.fs import S3FileSystem
+
     # wrong region for bucket
     # anonymous=True incase CI/etc has invalid credentials
     fs = S3FileSystem(region='eu-north-1', anonymous=True)
@@ -1464,6 +1454,8 @@ def test_s3fs_wrong_region():
 
 @pytest.mark.azure
 def test_azurefs_options(pickle_module):
+    from pyarrow.fs import AzureFileSystem
+
     fs1 = AzureFileSystem(account_name='fake-account-name')
     assert isinstance(fs1, AzureFileSystem)
     assert pickle_module.loads(pickle_module.dumps(fs1)) == fs1
@@ -1556,6 +1548,7 @@ def test_azurefs_options(pickle_module):
 
 @pytest.mark.hdfs
 def test_hdfs_options(hdfs_connection, pickle_module):
+    from pyarrow.fs import HadoopFileSystem
     if not pa.have_libhdfs():
         pytest.skip('Cannot locate libhdfs')
 
@@ -1662,6 +1655,8 @@ def test_filesystem_from_path_object(path):
 
 @pytest.mark.s3
 def test_filesystem_from_uri_s3(s3_server):
+    from pyarrow.fs import S3FileSystem
+
     host, port, access_key, secret_key = s3_server['connection']
 
     uri = f"s3://{access_key}:{secret_key}@mybucket/foo/bar?scheme=http&" \
@@ -1679,6 +1674,8 @@ def test_filesystem_from_uri_s3(s3_server):
 
 @pytest.mark.gcs
 def test_filesystem_from_uri_gcs(gcs_server):
+    from pyarrow.fs import GcsFileSystem
+
     host, port = gcs_server['connection']
 
     uri = ("gs://anonymous@" +
@@ -1867,6 +1864,7 @@ def test_py_open_append_stream():
 def test_s3_real_aws():
     # Exercise connection code with an AWS-backed S3 bucket.
     # This is a minimal integration check for ARROW-9261 and similar issues.
+    from pyarrow.fs import S3FileSystem
     default_region = (os.environ.get('PYARROW_TEST_S3_REGION') or
                       'us-east-1')
     fs = S3FileSystem(anonymous=True)
@@ -1922,6 +1920,7 @@ def test_s3_real_aws_region_selection():
 
 @pytest.mark.s3
 def test_resolve_s3_region():
+    from pyarrow.fs import resolve_s3_region
     assert resolve_s3_region('voltrondata-labs-datasets') == 'us-east-2'
     assert resolve_s3_region('mf-nwp-models') == 'eu-west-1'
 
@@ -2169,7 +2168,7 @@ def test_fsspec_filesystem_from_uri():
 def test_huggingface_filesystem_from_uri():
     pytest.importorskip("fsspec")
     try:
-        from huggingface_hub import HfFileSystem  # type: ignore[unresolved_import]
+        from huggingface_hub import HfFileSystem
     except ImportError:
         pytest.skip("huggingface_hub not installed")
 
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 01a6d2151a0..80d119a4853 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -23,7 +23,7 @@
 
 @pytest.mark.gandiva
 def test_tree_exp_builder():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     builder = gandiva.TreeExprBuilder()
 
@@ -65,7 +65,7 @@ def test_tree_exp_builder():
 
 @pytest.mark.gandiva
 def test_table():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     table = pa.Table.from_arrays([pa.array([1.0, 2.0]), pa.array([3.0, 4.0])],
                                  ['a', 'b'])
@@ -92,7 +92,7 @@ def test_table():
 
 @pytest.mark.gandiva
 def test_filter():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     table = pa.Table.from_arrays([pa.array([1.0 * i for i in range(10000)])],
                                  ['a'])
@@ -116,7 +116,7 @@ def test_filter():
 
 @pytest.mark.gandiva
 def test_in_expr():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     arr = pa.array(["ga", "an", "nd", "di", "iv", "va"])
     table = pa.Table.from_arrays([arr], ["a"])
@@ -154,7 +154,7 @@ def test_in_expr():
 @pytest.mark.skip(reason="Gandiva C++ did not have *real* binary, "
                          "time and date support.")
 def test_in_expr_todo():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
     # TODO: Implement reasonable support for timestamp, time & date.
     # Current exceptions:
     # pyarrow.lib.ArrowException: ExpressionValidationError:
@@ -227,7 +227,7 @@ def test_in_expr_todo():
 
 @pytest.mark.gandiva
 def test_boolean():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     table = pa.Table.from_arrays([
         pa.array([1., 31., 46., 3., 57., 44., 22.]),
@@ -254,7 +254,7 @@ def test_boolean():
 
 @pytest.mark.gandiva
 def test_literals():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     builder = gandiva.TreeExprBuilder()
 
@@ -294,7 +294,7 @@ def test_literals():
 
 @pytest.mark.gandiva
 def test_regex():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     elements = ["park", "sparkle", "bright spark and fire", "spark"]
     data = pa.array(elements, type=pa.string())
@@ -318,7 +318,7 @@ def test_regex():
 
 @pytest.mark.gandiva
 def test_get_registered_function_signatures():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
     signatures = gandiva.get_registered_function_signatures()
 
     assert type(signatures[0].return_type()) is pa.DataType
@@ -328,7 +328,7 @@ def test_get_registered_function_signatures():
 
 @pytest.mark.gandiva
 def test_filter_project():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
     mpool = pa.default_memory_pool()
     # Create a table with some sample data
     array0 = pa.array([10, 12, -20, 5, 21, 29], pa.int32())
@@ -375,7 +375,7 @@ def test_filter_project():
 
 @pytest.mark.gandiva
 def test_to_string():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
     builder = gandiva.TreeExprBuilder()
 
     assert str(builder.make_literal(2.0, pa.float64())
@@ -395,7 +395,7 @@ def test_to_string():
 
 @pytest.mark.gandiva
 def test_rejects_none():
-    import pyarrow.gandiva as gandiva  # type: ignore[unresolved_import]
+    import pyarrow.gandiva as gandiva
 
     builder = gandiva.TreeExprBuilder()
 
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 4b1641557e7..912953ae60d 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -158,10 +158,10 @@ def select_frame(self, func_name):
         m = re.search(pat, out)
         if m is None:
             pytest.fail(f"Could not select frame for function {func_name}")
-        else:
-            frame_num = int(m[1])
-            out = self.run_command(f"frame {frame_num}")
-            assert f"in {func_name}" in out
+
+        frame_num = int(m[1])
+        out = self.run_command(f"frame {frame_num}")
+        assert f"in {func_name}" in out
 
     def join(self):
         if self.proc is not None:
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index bea9a929673..a6d3546e57c 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -33,12 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
-
-try:
-    from pyarrow import lib  # type: ignore[unresolved-attribute]
-except ImportError:
-    pass
+    np = None
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -817,7 +812,7 @@ def test_cache_options_pickling(pickle_module):
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
+        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
     ),
     "brotli",
     "gzip",
@@ -858,7 +853,7 @@ def test_compress_decompress(compression):
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
+        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
     ),
     "brotli",
     "gzip",
@@ -1730,7 +1725,7 @@ def test_output_stream_constructor(tmpdir):
 ])
 def test_compression_detection(path, expected_compression):
     if not Codec.is_available(expected_compression):
-        with pytest.raises(lib.ArrowNotImplementedError):
+        with pytest.raises(pa.lib.ArrowNotImplementedError):
             Codec.detect(path)
     else:
         codec = Codec.detect(path)
@@ -1755,7 +1750,7 @@ def test_unknown_compression_raises():
     "zstd",
     pytest.param(
         "snappy",
-        marks=pytest.mark.xfail(raises=lib.ArrowNotImplementedError)
+        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
     )
 ])
 def test_compressed_roundtrip(compression):
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 77018f93a24..b3b3367223d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -28,15 +28,11 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
 
-try:
-    from pyarrow import lib  # type: ignore[unresolved-attribute]
-except ImportError:
-    pass
 
 try:
     from pandas.testing import assert_frame_equal
@@ -1238,7 +1234,7 @@ def __arrow_c_stream__(self, requested_schema=None):
     assert reader.read_all() == expected.cast(good_schema)
 
     # If schema doesn't match, raises TypeError
-    with pytest.raises(lib.ArrowTypeError, match='Field 0 cannot be cast'):
+    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
         pa.RecordBatchReader.from_stream(
             wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
         )
@@ -1275,7 +1271,7 @@ def test_record_batch_reader_cast():
 
     # Check error for impossible cast in call to .cast()
     reader = pa.RecordBatchReader.from_batches(schema_src, data)
-    with pytest.raises(lib.ArrowTypeError, match='Field 0 cannot be cast'):
+    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
         reader.cast(pa.schema([pa.field('a', pa.list_(pa.int32()))]))
 
     # Cast to same type should always work (also for types without a T->T cast function)
@@ -1313,7 +1309,7 @@ def test_record_batch_reader_cast_nulls():
     # when the batch is pulled
     reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls)
     casted_reader = reader.cast(schema_dst)
-    with pytest.raises(lib.ArrowInvalid, match="Can't cast array"):
+    with pytest.raises(pa.lib.ArrowInvalid, match="Can't cast array"):
         casted_reader.read_all()
 
 
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index 90ce549c6e6..c3f9fe333bd 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -20,14 +20,14 @@
 from decimal import Decimal
 import io
 import itertools
-from json import dumps as json_dumps
+import json
 import string
 import unittest
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -49,7 +49,7 @@ def make_random_json(num_cols=2, num_rows=10, linesep='\r\n'):
     lines = []
     for row in arr.T:
         json_obj = OrderedDict([(k, int(v)) for (k, v) in zip(col_names, row)])
-        lines.append(json_dumps(json_obj))
+        lines.append(json.dumps(json_obj))
     data = linesep.join(lines).encode()
     columns = [pa.array(col, type=pa.int64()) for col in arr]
     expected = pa.Table.from_arrays(columns, col_names)
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index 51f259e4bd5..d2ba780efc7 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -15,8 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from json import dumps as json_dumps
-from json import loads as json_loads
+import json
 import os
 import pyarrow as pa
 import pyarrow.jvm as pa_jvm
@@ -43,7 +42,7 @@ def root_allocator():
         'POM:version',
         namespaces={
             'POM': 'http://maven.apache.org/POM/4.0.0'
-        }).text  # type: ignore[possibly-unbound-attribute]
+        }).text
     jar_path = os.path.join(
         arrow_dir, 'java', 'tools', 'target',
         f'arrow-tools-{version}-jar-with-dependencies.jar')
@@ -77,8 +76,8 @@ def test_jvm_buffer(root_allocator):
 
 
 def test_jvm_buffer_released(root_allocator):
-    import jpype.imports  # type: ignore[unresolved_import]  # noqa
-    from java.lang import IllegalArgumentException  # type: ignore[unresolved_import]
+    import jpype.imports  # noqa
+    from java.lang import IllegalArgumentException
 
     jvm_buffer = root_allocator.buffer(8)
     jvm_buffer.release()
@@ -172,27 +171,27 @@ def test_jvm_types(root_allocator, pa_type, jvm_spec, nullable):
     spec = {
         'name': 'field_name',
         'nullable': nullable,
-        'type': json_loads(jvm_spec),
+        'type': json.loads(jvm_spec),
         # TODO: This needs to be set for complex types
         'children': []
     }
-    jvm_field = _jvm_field(json_dumps(spec))
+    jvm_field = _jvm_field(json.dumps(spec))
     result = pa_jvm.field(jvm_field)
     expected_field = pa.field('field_name', pa_type, nullable=nullable)
     assert result == expected_field
 
-    jvm_schema = _jvm_schema(json_dumps(spec))
+    jvm_schema = _jvm_schema(json.dumps(spec))
     result = pa_jvm.schema(jvm_schema)
     assert result == pa.schema([expected_field])
 
     # Schema with custom metadata
-    jvm_schema = _jvm_schema(json_dumps(spec), {'meta': 'data'})
+    jvm_schema = _jvm_schema(json.dumps(spec), {'meta': 'data'})
     result = pa_jvm.schema(jvm_schema)
     assert result == pa.schema([expected_field], {'meta': 'data'})
 
     # Schema with custom field metadata
     spec['metadata'] = [{'key': 'field meta', 'value': 'field data'}]
-    jvm_schema = _jvm_schema(json_dumps(spec))
+    jvm_schema = _jvm_schema(json.dumps(spec))
     result = pa_jvm.schema(jvm_schema)
     expected_field = expected_field.with_metadata(
         {'field meta': 'field data'})
@@ -376,11 +375,11 @@ def test_jvm_record_batch(root_allocator, pa_type, py_data, jvm_type,
     spec = {
         'name': 'field_name',
         'nullable': False,
-        'type': json_loads(jvm_spec),
+        'type': json.loads(jvm_spec),
         # TODO: This needs to be set for complex types
         'children': []
     }
-    jvm_field = _jvm_field(json_dumps(spec))
+    jvm_field = _jvm_field(json.dumps(spec))
 
     # Create VectorSchemaRoot
     jvm_fields = jpype.JClass('java.util.ArrayList')()
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 09ac52588ed..64f45d8bed8 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -22,7 +22,7 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.lib import ArrowInvalid  # type: ignore[unresolved_import]
+from pyarrow.lib import ArrowInvalid
 
 
 def test_get_include():
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 9f15bc73c5b..ceea2527da0 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -17,7 +17,7 @@
 
 import gc
 import decimal
-from json import dumps as json_dumps
+import json
 import multiprocessing as mp
 import sys
 import warnings
@@ -32,14 +32,13 @@
     import numpy as np
     import numpy.testing as npt
     try:
-        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning \
-            # type: ignore[unresolved-attribute]
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
     except AttributeError:
         from numpy.exceptions import (
             VisibleDeprecationWarning as _np_VisibleDeprecationWarning
         )
 except ImportError:
-    pass
+    np = None
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
@@ -48,7 +47,6 @@
 from pyarrow.vendored.version import Version
 
 import pyarrow as pa
-from pyarrow import lib  # type: ignore[unresolved-attribute]
 try:
     from pyarrow import parquet as pq
 except ImportError:
@@ -629,13 +627,11 @@ def test_table_column_subset_metadata(self):
             expected = df[['a']]
             if isinstance(df.index, pd.DatetimeIndex):
                 df.index.freq = None
-            tm.assert_frame_equal(result, expected) \
-                # type: ignore[invalid-argument-type]
+            tm.assert_frame_equal(result, expected)
 
             table_subset2 = table_subset.remove_column(1)
             result = table_subset2.to_pandas()
-            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True)) \
-                # type: ignore[invalid-argument-type]
+            tm.assert_frame_equal(result, df[['a']].reset_index(drop=True))
 
     def test_to_pandas_column_subset_multiindex(self):
         # ARROW-10122
@@ -1941,7 +1937,7 @@ def test_array_of_bytes_to_strings(self):
     # cannot be converted to utf-8
     def test_array_of_bytes_to_strings_bad_data(self):
         with pytest.raises(
-                lib.ArrowInvalid,
+                pa.lib.ArrowInvalid,
                 match="was not a utf8 string"):
             pa.array(np.array([b'\x80\x81'], dtype=object), pa.string())
 
@@ -1957,13 +1953,13 @@ def test_numpy_string_array_to_fixed_size_binary(self):
         expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3))
         assert converted.equals(expected)
 
-        with pytest.raises(lib.ArrowInvalid,
+        with pytest.raises(pa.lib.ArrowInvalid,
                            match=r'Got bytestring of length 3 \(expected 4\)'):
             arr = np.array([b'foo', b'bar', b'baz'], dtype='|S3')
             pa.array(arr, type=pa.binary(4))
 
         with pytest.raises(
-                lib.ArrowInvalid,
+                pa.lib.ArrowInvalid,
                 match=r'Got bytestring of length 12 \(expected 3\)'):
             arr = np.array([b'foo', b'bar', b'baz'], dtype='|U3')
             pa.array(arr, type=pa.binary(3))
@@ -3268,8 +3264,7 @@ def test_error_sparse(self):
             df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
         except AttributeError:
             # pandas.arrays module introduced in pandas 0.24
-            from pandas import SparseArray  # type: ignore[unresolved-import]
-            df = pd.DataFrame({'a': SparseArray([1, np.nan, 3])})
+            df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
         with pytest.raises(TypeError, match="Sparse pandas data"):
             pa.Table.from_pandas(df)
 
@@ -3724,9 +3719,7 @@ def test_table_from_pandas_schema_field_order_metadata():
         coerce_cols_to_types["datetime"] = "datetime64[s, UTC]"
     expected = df[["float", "datetime"]].astype(coerce_cols_to_types)
 
-    # TODO: result and expected should have the same type,
-    #  see other ignore[invalid-argument-type] involving assert_frame_equal
-    tm.assert_frame_equal(result, expected)  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result, expected)
 
 
 # ----------------------------------------------------------------------
@@ -4429,13 +4422,11 @@ def test_convert_to_extension_array(monkeypatch):
 
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     if Version(pd.__version__) < Version("1.3.0.dev"):
-        from pandas.core import integer   # type: ignore[unresolved-import]
         monkeypatch.delattr(
-            integer._IntegerDtype, "__from_arrow__")
+            pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__") \
-            # type: ignore[unresolved-attribute]
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
     assert len(_get_mgr(result).blocks) == 1
@@ -4476,13 +4467,11 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     # (remove the version added above and the actual version for recent pandas)
     if Version(pd.__version__) < Version("1.3.0.dev"):
-        from pandas.core import integer  # type: ignore[unresolved-import]
         monkeypatch.delattr(
-            integer._IntegerDtype, "__from_arrow__")
+            pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
     else:
         monkeypatch.delattr(
-            pd.core.arrays.integer.NumericDtype, "__from_arrow__") \
-            # type: ignore[unresolved-attribute]
+            pd.core.arrays.integer.NumericDtype, "__from_arrow__")
 
     result = arr.to_pandas()
     assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
@@ -4661,7 +4650,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t1 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['a', 'qux'])
     t1 = t1.replace_schema_metadata({
-        b'pandas': json_dumps(
+        b'pandas': json.dumps(
             {'index_columns': ['qux'],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4690,7 +4679,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t2 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['qux', gen_name_0])
     t2 = t2.replace_schema_metadata({
-        b'pandas': json_dumps(
+        b'pandas': json.dumps(
             {'index_columns': [gen_name_0],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4719,7 +4708,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t3 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
                               names=['a', gen_name_0])
     t3 = t3.replace_schema_metadata({
-        b'pandas': json_dumps(
+        b'pandas': json.dumps(
             {'index_columns': [gen_name_0],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4748,7 +4737,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t4 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
                               names=['a', 'qux', gen_name_1])
     t4 = t4.replace_schema_metadata({
-        b'pandas': json_dumps(
+        b'pandas': json.dumps(
             {'index_columns': ['qux', gen_name_1],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4782,7 +4771,7 @@ def test_metadata_compat_range_index_pre_0_12():
     t5 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
                               names=['a', gen_name_0, gen_name_1])
     t5 = t5.replace_schema_metadata({
-        b'pandas': json_dumps(
+        b'pandas': json.dumps(
             {'index_columns': [gen_name_0, gen_name_1],
              'column_indexes': [{'name': None,
                                  'field_name': None,
@@ -4829,7 +4818,7 @@ def test_metadata_compat_missing_field_name():
 
     # metadata generated by fastparquet 0.3.2 with missing field_names
     table = table.replace_schema_metadata({
-        b'pandas': json_dumps({
+        b'pandas': json.dumps({
             'column_indexes': [
                 {'field_name': None,
                  'metadata': None,
@@ -4971,7 +4960,7 @@ def test_does_not_mutate_timedelta_dtype():
 
     assert np.dtype(np.timedelta64) == expected
 
-    df = pd.DataFrame({"a": [np.timedelta64("s")]})
+    df = pd.DataFrame({"a": [np.timedelta64()]})
     t = pa.Table.from_pandas(df)
     t.to_pandas()
 
@@ -5126,7 +5115,7 @@ def test_roundtrip_map_array_with_pydicts_duplicate_keys():
 
     # ------------------------
     # With maps as pydicts
-    with pytest.raises(lib.ArrowException):
+    with pytest.raises(pa.lib.ArrowException):
         # raises because of duplicate keys
         maps.to_pandas(maps_as_pydicts="strict")
     series_pydicts = maps.to_pandas(maps_as_pydicts="lossy")
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 407c69263e8..0f62dd98f82 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -24,7 +24,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -201,7 +201,7 @@ def test_timestamp_scalar():
     assert b == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00+0000'>"
     c = repr(pa.scalar(datetime.datetime(2015, 1, 1), type=pa.timestamp('us')))
     assert c == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00.000000'>"
-    d = repr(pc.assume_timezone(  # type: ignore[unresolved-attribute]
+    d = repr(pc.assume_timezone(
         pa.scalar("2000-01-01").cast(pa.timestamp("s")), "America/New_York"))
     assert d == "<pyarrow.TimestampScalar: '2000-01-01T00:00:00-0500'>"
 
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 48af7b143ff..a1197ed2d08 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -627,11 +627,11 @@ def test_type_schema_pickling(pickle_module):
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
-        ], pa.lib.UnionMode_SPARSE),  # type: ignore[unresolved-attribute]
+        ], pa.lib.UnionMode_SPARSE),
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
-        ], pa.lib.UnionMode_DENSE),  # type: ignore[unresolved-attribute]
+        ], pa.lib.UnionMode_DENSE),
         pa.time32('s'),
         pa.time64('us'),
         pa.date32(),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index 89823e04943..eca8090d77a 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -28,12 +28,15 @@
 try:
     from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
 except ImportError:
-    pytestmark = pytest.mark.scipy
+    coo_matrix = None
+    csr_matrix = None
+    csr_array = None
+    coo_array = None
 
 try:
-    import sparse  # type: ignore[unresolved_import]
+    import sparse
 except ImportError:
-    pytestmark = pytest.mark.pydata_sparse
+    sparse = None
 
 
 tensor_type_pairs = [
@@ -398,7 +401,7 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
     assert np.array_equal(array, result_array)
 
 
-@pytest.mark.scipy
+@pytest.mark.skipif(not coo_matrix, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
@@ -440,7 +443,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
     assert out_scipy_matrix.has_canonical_format
 
 
-@pytest.mark.scipy
+@pytest.mark.skipif(not csr_matrix, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
@@ -468,7 +471,7 @@ def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
     assert np.array_equal(dense_array, sparse_tensor.to_tensor().to_numpy())
 
 
-@pytest.mark.pydata_sparse
+@pytest.mark.skipif(not sparse, reason="requires pydata/sparse")
 @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
 def test_pydata_sparse_sparse_coo_tensor_roundtrip(dtype_str, arrow_type):
     dtype = np.dtype(dtype_str)
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index 0fe9508aef0..babb839b534 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -19,29 +19,29 @@
 
 import pytest
 
-from pyarrow import lib  # type: ignore[unresolved-attribute]
+import pyarrow as pa
 import pyarrow.tests.strategies as past
 
 
 @h.given(past.all_types)
 def test_types(ty):
-    assert isinstance(ty, lib.DataType)
+    assert isinstance(ty, pa.lib.DataType)
 
 
 @h.given(past.all_fields)
 def test_fields(field):
-    assert isinstance(field, lib.Field)
+    assert isinstance(field, pa.lib.Field)
 
 
 @h.given(past.all_schemas)
 def test_schemas(schema):
-    assert isinstance(schema, lib.Schema)
+    assert isinstance(schema, pa.lib.Schema)
 
 
 @pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
-    assert isinstance(array, lib.Array)
+    assert isinstance(array, pa.lib.Array)
 
 
 @pytest.mark.numpy
@@ -52,15 +52,15 @@ def test_array_nullability(array):
 
 @h.given(past.chunked_arrays(past.primitive_types))
 def test_chunked_arrays(chunked_array):
-    assert isinstance(chunked_array, lib.ChunkedArray)
+    assert isinstance(chunked_array, pa.lib.ChunkedArray)
 
 
 @h.given(past.all_record_batches)
 def test_record_batches(record_bath):
-    assert isinstance(record_bath, lib.RecordBatch)
+    assert isinstance(record_bath, pa.lib.RecordBatch)
 
 
 @pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
-    assert isinstance(table, lib.Table)
+    assert isinstance(table, pa.lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index 8ac0951e489..fcd1c8d48c5 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -22,16 +22,13 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.compute import equal  # type: ignore[unresolved-attribute]
-from pyarrow import _substrait  # type: ignore[unresolved-attribute]
-from pyarrow.lib import tobytes  # type: ignore[unresolved_import]
-from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError \
-    # type: ignore[unresolved_import]
+from pyarrow.lib import tobytes
+from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError
 
 try:
     import pyarrow.substrait as substrait
 except ImportError:
-    pass
+    substrait = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not substrait'
@@ -39,7 +36,7 @@
 
 
 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
+    from pyarrow._compute import _get_udf_context
     return _get_udf_context(pa.default_memory_pool(), batch_length)
 
 
@@ -88,7 +85,7 @@ def test_run_serialized_query(tmpdir, use_threads):
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
 
-    buf = _substrait._parse_json_plan(query)
+    buf = pa._substrait._parse_json_plan(query)
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -119,7 +116,7 @@ def test_invalid_plan():
         ]
     }
     """
-    buf = _substrait._parse_json_plan(tobytes(query))
+    buf = pa._substrait._parse_json_plan(tobytes(query))
     exec_message = "Plan has no relations"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf)
@@ -165,7 +162,7 @@ def test_binary_conversion_with_json_options(tmpdir, use_threads):
     path = _write_dummy_data_to_disk(tmpdir, file_name, table)
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
-    buf = _substrait._parse_json_plan(tobytes(query))
+    buf = pa._substrait._parse_json_plan(tobytes(query))
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -184,7 +181,7 @@ def has_function(fns, ext_file, fn_name):
 
 
 def test_get_supported_functions():
-    supported_functions = _substrait.get_supported_functions()
+    supported_functions = pa._substrait.get_supported_functions()
     # It probably doesn't make sense to exhaustively verify this list but
     # we can check a sample aggregate and a sample non-aggregate entry
     assert has_function(supported_functions,
@@ -235,7 +232,7 @@ def table_provider(names, schema):
     }
     """
 
-    buf = _substrait._parse_json_plan(tobytes(substrait_query))
+    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -278,7 +275,7 @@ def table_provider(names, _):
     }
     """
 
-    buf = _substrait._parse_json_plan(tobytes(substrait_query))
+    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
     exec_message = "Invalid NamedTable Source"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -320,7 +317,7 @@ def table_provider(names, _):
     }
     """
     query = tobytes(substrait_query)
-    buf = _substrait._parse_json_plan(tobytes(query))
+    buf = pa._substrait._parse_json_plan(tobytes(query))
     exec_message = "names for NamedTable not provided"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -439,7 +436,7 @@ def table_provider(names, _):
 }
     """
 
-    buf = _substrait._parse_json_plan(substrait_query)
+    buf = pa._substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -562,7 +559,7 @@ def table_provider(names, _):
 }
     """
 
-    buf = _substrait._parse_json_plan(substrait_query)
+    buf = pa._substrait._parse_json_plan(substrait_query)
     with pytest.raises(pa.ArrowKeyError) as excinfo:
         pa.substrait.run_query(buf, table_provider=table_provider)
     assert "No function registered" in str(excinfo.value)
@@ -601,7 +598,7 @@ def table_provider(names, schema):
     }
     """
 
-    buf = _substrait._parse_json_plan(tobytes(substrait_query))
+    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -747,7 +744,7 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = _substrait._parse_json_plan(substrait_query)
+    buf = pa._substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
@@ -916,7 +913,7 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = _substrait._parse_json_plan(substrait_query)
+    buf = pa._substrait._parse_json_plan(substrait_query)
     reader = pa.substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
@@ -932,8 +929,8 @@ def table_provider(names, _):
 
 
 @pytest.mark.parametrize("expr", [
-    equal(pc.field("x"), 7),
-    equal(pc.field("x"), pc.field("y")),
+    pc.equal(pc.field("x"), 7),
+    pc.equal(pc.field("x"), pc.field("y")),
     pc.field("x") > 50
 ])
 def test_serializing_expressions(expr):
@@ -988,7 +985,7 @@ def test_arrow_one_way_types():
     )
 
     def check_one_way(field):
-        expr = pc.is_null(pc.field(field.name))  # type: ignore[unresolved-attribute]
+        expr = pc.is_null(pc.field(field.name))
         buf = pa.substrait.serialize_expressions([expr], ["test_expr"], schema)
         returned = pa.substrait.deserialize_expressions(buf)
         assert alt_schema == returned.schema
@@ -1002,8 +999,8 @@ def test_invalid_expression_ser_des():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    expr = equal(pc.field("x"), 7)
-    bad_expr = equal(pc.field("z"), 7)
+    expr = pc.equal(pc.field("x"), 7)
+    bad_expr = pc.equal(pc.field("z"), 7)
     # Invalid number of names
     with pytest.raises(ValueError) as excinfo:
         pa.substrait.serialize_expressions([expr], [], schema)
@@ -1022,13 +1019,13 @@ def test_serializing_multiple_expressions():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    exprs = [equal(pc.field("x"), 7), equal(pc.field("x"), pc.field("y"))]
+    exprs = [pc.equal(pc.field("x"), 7), pc.equal(pc.field("x"), pc.field("y"))]
     buf = pa.substrait.serialize_expressions(exprs, ["first", "second"], schema)
     returned = pa.substrait.deserialize_expressions(buf)
     assert schema == returned.schema
     assert len(returned.expressions) == 2
 
-    norm_exprs = [equal(pc.field(0), 7), equal(pc.field(0), pc.field(1))]
+    norm_exprs = [pc.equal(pc.field(0), 7), pc.equal(pc.field(0), pc.field(1))]
     assert str(returned.expressions["first"]) == str(norm_exprs[0])
     assert str(returned.expressions["second"]) == str(norm_exprs[1])
 
@@ -1038,8 +1035,8 @@ def test_serializing_with_compute():
         pa.field("x", pa.int32()),
         pa.field("y", pa.int32())
     ])
-    expr = equal(pc.field("x"), 7)
-    expr_norm = equal(pc.field(0), 7)
+    expr = pc.equal(pc.field("x"), 7)
+    expr_norm = pc.equal(pc.field(0), 7)
     buf = expr.to_substrait(schema)
     returned = pa.substrait.deserialize_expressions(buf)
 
@@ -1069,7 +1066,7 @@ def test_serializing_udfs():
     ])
     a = pc.scalar(10)
     b = pc.scalar(4)
-    exprs = [pc.shift_left(a, b)]  # type: ignore[unresolved-attribute]
+    exprs = [pc.shift_left(a, b)]
 
     with pytest.raises(ArrowNotImplementedError):
         pa.substrait.serialize_expressions(exprs, ["expr"], schema)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 64624c93f1e..b65fb7d952c 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -23,10 +23,9 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pytest
 import pyarrow as pa
-from pyarrow import lib  # type: ignore[unresolved-import]
 import pyarrow.compute as pc
 from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
@@ -50,8 +49,8 @@ def test_chunked_array_basics():
         [7, 8, 9]
     ])
     assert isinstance(data.chunks, list)
-    assert all(isinstance(c, lib.Int64Array) for c in data.chunks)
-    assert all(isinstance(c, lib.Int64Array) for c in data.iterchunks())
+    assert all(isinstance(c, pa.lib.Int64Array) for c in data.chunks)
+    assert all(isinstance(c, pa.lib.Int64Array) for c in data.iterchunks())
     assert len(data.chunks) == 3
     assert data.get_total_buffer_size() == sum(c.get_total_buffer_size()
                                                for c in data.iterchunks())
@@ -419,7 +418,7 @@ def test_to_pandas_empty_table():
     table = pa.table(df)
     result = table.schema.empty_table().to_pandas()
     assert result.shape == (0, 2)
-    tm.assert_frame_equal(result, df.iloc[:0])  # type: ignore[invalid-argument-type]
+    tm.assert_frame_equal(result, df.iloc[:0])
 
 
 @pytest.mark.pandas
@@ -651,7 +650,7 @@ def __arrow_c_stream__(self, requested_schema=None):
 
     # If schema doesn't match, raises NotImplementedError
     with pytest.raises(
-        lib.ArrowTypeError, match="Field 0 cannot be cast"
+        pa.lib.ArrowTypeError, match="Field 0 cannot be cast"
     ):
         pa.table(
             wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
@@ -2231,7 +2230,7 @@ def test_invalid_table_construct():
     u8 = pa.uint8()
     arrays = [pa.array(array, type=u8), pa.array(array[1:], type=u8)]
 
-    with pytest.raises(lib.ArrowInvalid):
+    with pytest.raises(pa.lib.ArrowInvalid):
         pa.Table.from_arrays(arrays, names=["a1", "a2"])
 
 
@@ -3300,7 +3299,7 @@ def test_table_join_asof_by_length_mismatch():
     })
 
     msg = "inconsistent size of by-key across inputs"
-    with pytest.raises(lib.ArrowInvalid, match=msg):
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA", "colB"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3322,7 +3321,7 @@ def test_table_join_asof_by_type_mismatch():
     })
 
     msg = "Expected by-key type int64 but got double for field colA in input 1"
-    with pytest.raises(lib.ArrowInvalid, match=msg):
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3344,7 +3343,7 @@ def test_table_join_asof_on_type_mismatch():
     })
 
     msg = "Expected on-key type int64 but got double for field on in input 1"
-    with pytest.raises(lib.ArrowInvalid, match=msg):
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
         t1.join_asof(
             t2, on="on", by=["colA"], tolerance=1,
             right_on="on", right_by=["colA"],
@@ -3471,14 +3470,14 @@ def test_invalid_non_join_column():
     })
 
     # check as left table
-    with pytest.raises(lib.ArrowInvalid) as excinfo:
+    with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t1.join(t2, 'id', join_type='inner')
     exp_error_msg = "Data type list<item: int64> is not supported " \
         + "in join non-key field array_column"
     assert exp_error_msg in str(excinfo.value)
 
     # check as right table
-    with pytest.raises(lib.ArrowInvalid) as excinfo:
+    with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 4077b302f71..e628e559b84 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -27,19 +27,17 @@
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
-    pass
+    tzst = None
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
 
-from pyarrow import lib  # type: ignore[unresolved-import]
-
 
 def get_many_types():
     # returning them from a function is required because of pa.dictionary
@@ -85,14 +83,14 @@ def get_many_types():
                    pa.field('b', pa.int8(), nullable=False),
                    pa.field('c', pa.string())]),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_DENSE,
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE,
                  type_codes=[4, 8]),
         pa.union([pa.field('a', pa.binary(10)),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
         pa.union([pa.field('a', pa.binary(10), nullable=False),
-                  pa.field('b', pa.string())], mode=lib.UnionMode_SPARSE),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
         pa.dictionary(pa.int32(), pa.string()),
         pa.run_end_encoded(pa.int16(), pa.int32()),
         pa.run_end_encoded(pa.int32(), pa.string()),
@@ -249,7 +247,7 @@ def test_is_nested_or_struct():
 
 
 def test_is_union():
-    for mode in [lib.UnionMode_SPARSE, lib.UnionMode_DENSE]:
+    for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]:
         assert types.is_union(pa.union([pa.field('a', pa.int32()),
                                         pa.field('b', pa.int8()),
                                         pa.field('c', pa.string())],
@@ -355,7 +353,7 @@ def test_is_primitive():
     (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30')
 ])
 def test_tzinfo_to_string(tz, expected):
-    assert lib.tzinfo_to_string(tz) == expected
+    assert pa.lib.tzinfo_to_string(tz) == expected
 
 
 def test_pytz_tzinfo_to_string():
@@ -363,13 +361,13 @@ def test_pytz_tzinfo_to_string():
 
     tz = [pytz.utc, pytz.timezone('Europe/Paris')]
     expected = ['UTC', 'Europe/Paris']
-    assert [lib.tzinfo_to_string(i) for i in tz] == expected
+    assert [pa.lib.tzinfo_to_string(i) for i in tz] == expected
 
     # StaticTzInfo.tzname returns with '-09' so we need to infer the timezone's
     # name from the tzinfo.zone attribute
     tz = [pytz.timezone('Etc/GMT-9'), pytz.FixedOffset(180)]
     expected = ['Etc/GMT-9', '+03:00']
-    assert [lib.tzinfo_to_string(i) for i in tz] == expected
+    assert [pa.lib.tzinfo_to_string(i) for i in tz] == expected
 
 
 @pytest.mark.timezone_data
@@ -383,9 +381,9 @@ def test_dateutil_tzinfo_to_string():
     import dateutil.tz
 
     tz = dateutil.tz.UTC
-    assert lib.tzinfo_to_string(tz) == 'UTC'
+    assert pa.lib.tzinfo_to_string(tz) == 'UTC'
     tz = dateutil.tz.gettz('Europe/Paris')
-    assert lib.tzinfo_to_string(tz) == 'Europe/Paris'
+    assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris'
 
 
 @pytest.mark.timezone_data
@@ -397,20 +395,20 @@ def test_zoneinfo_tzinfo_to_string():
         pytest.importorskip('tzdata')
 
     tz = zoneinfo.ZoneInfo('UTC')
-    assert lib.tzinfo_to_string(tz) == 'UTC'
+    assert pa.lib.tzinfo_to_string(tz) == 'UTC'
     tz = zoneinfo.ZoneInfo('Europe/Paris')
-    assert lib.tzinfo_to_string(tz) == 'Europe/Paris'
+    assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris'
 
 
 def test_tzinfo_to_string_errors():
     msg = "Not an instance of datetime.tzinfo"
     with pytest.raises(TypeError):
-        lib.tzinfo_to_string("Europe/Budapest")
+        pa.lib.tzinfo_to_string("Europe/Budapest")
 
     tz = datetime.timezone(datetime.timedelta(hours=1, seconds=30))
     msg = "Offset must represent whole number of minutes"
     with pytest.raises(ValueError, match=msg):
-        lib.tzinfo_to_string(tz)
+        pa.lib.tzinfo_to_string(tz)
 
 
 if tzst:
@@ -423,8 +421,8 @@ def test_tzinfo_to_string_errors():
 def test_pytz_timezone_roundtrip(tz):
     if tz is None:
         pytest.skip('requires timezone not None')
-    timezone_string = lib.tzinfo_to_string(tz)
-    timezone_tzinfo = lib.string_to_tzinfo(timezone_string)
+    timezone_string = pa.lib.tzinfo_to_string(tz)
+    timezone_tzinfo = pa.lib.string_to_tzinfo(timezone_string)
     assert timezone_tzinfo == tz
 
 
@@ -484,14 +482,14 @@ def tzname(self, dt):
         def utcoffset(self, dt):
             return None
 
-    assert lib.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
-    assert lib.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
+    assert pa.lib.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
+    assert pa.lib.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
 
     msg = (r"Object returned by tzinfo.utcoffset\(None\) is not an instance "
            r"of datetime.timedelta")
     for wrong in [BuggyTimezone1(), BuggyTimezone2(), BuggyTimezone3()]:
         with pytest.raises(ValueError, match=msg):
-            lib.tzinfo_to_string(wrong)
+            pa.lib.tzinfo_to_string(wrong)
 
 
 def test_string_to_tzinfo():
@@ -501,7 +499,7 @@ def test_string_to_tzinfo():
         expected = [pytz.utc, pytz.timezone('Europe/Paris'),
                     pytz.FixedOffset(180), pytz.FixedOffset(90),
                     pytz.FixedOffset(-120)]
-        result = [lib.string_to_tzinfo(i) for i in string]
+        result = [pa.lib.string_to_tzinfo(i) for i in string]
         assert result == expected
 
     except ImportError:
@@ -513,7 +511,7 @@ def test_string_to_tzinfo():
                         datetime.timezone(
                             datetime.timedelta(hours=1, minutes=30)),
                         datetime.timezone(-datetime.timedelta(hours=2))]
-            result = [lib.string_to_tzinfo(i) for i in string]
+            result = [pa.lib.string_to_tzinfo(i) for i in string]
             assert result == expected
 
         except ImportError:
@@ -527,8 +525,8 @@ def test_timezone_string_roundtrip_pytz():
           pytz.utc, pytz.timezone('America/New_York')]
     name = ['+01:30', '-01:30', 'UTC', 'America/New_York']
 
-    assert [lib.tzinfo_to_string(i) for i in tz] == name
-    assert [lib.string_to_tzinfo(i)for i in name] == tz
+    assert [pa.lib.tzinfo_to_string(i) for i in tz] == name
+    assert [pa.lib.string_to_tzinfo(i)for i in name] == tz
 
 
 def test_timestamp():
@@ -799,13 +797,13 @@ def check_fields(ty, fields):
 
     sparse_factories = [
         partial(pa.union, mode='sparse'),
-        partial(pa.union, mode=lib.UnionMode_SPARSE),
+        partial(pa.union, mode=pa.lib.UnionMode_SPARSE),
         pa.sparse_union,
     ]
 
     dense_factories = [
         partial(pa.union, mode='dense'),
-        partial(pa.union, mode=lib.UnionMode_DENSE),
+        partial(pa.union, mode=pa.lib.UnionMode_DENSE),
         pa.dense_union,
     ]
 
@@ -1324,7 +1322,6 @@ def test_field_modified_copies():
     assert f0.equals(f0_)
 
 
-@pytest.mark.numpy
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
     if np is not None:
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index aed2fbceaeb..93004a30618 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -35,11 +35,11 @@
 try:
     import pyarrow.dataset as ds
 except ImportError:
-    pass
+    ds = None
 
 
 def mock_udf_context(batch_length=10):
-    from pyarrow._compute import _get_udf_context  # type: ignore[unresolved_import]
+    from pyarrow._compute import _get_udf_context
     return _get_udf_context(pa.default_memory_pool(), batch_length)
 
 
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 7eefd067807..d8b250ffff0 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -171,8 +171,7 @@ def get_modified_env_with_pythonpath():
     existing_pythonpath = env.get('PYTHONPATH', '')
 
     module_path = os.path.abspath(
-        os.path.dirname(os.path.dirname(pa.__file__))) \
-        # type: ignore[no-matching-overload]
+        os.path.dirname(os.path.dirname(pa.__file__)))
 
     if existing_pythonpath:
         new_pythonpath = os.pathsep.join((module_path, existing_pythonpath))
diff --git a/python/pyarrow/tests/wsgi_examples.py b/python/pyarrow/tests/wsgi_examples.py
index 1fafa852dc6..440b107abe5 100644
--- a/python/pyarrow/tests/wsgi_examples.py
+++ b/python/pyarrow/tests/wsgi_examples.py
@@ -28,7 +28,7 @@ def application(env, start_response):
         # See test_fs::test_uwsgi_integration
         start_response('200 OK', [('Content-Type', 'text/html')])
         # flake8: noqa
-        fs = pyarrow.fs.S3FileSystem()  # type: ignore[possibly-unbound-attribute]
+        fs = pyarrow.fs.S3FileSystem()
         return [b"Hello World\n"]
     else:
         start_response('404 Not Found', [('Content-Type', 'text/html')])
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 9c16ee08892..94a0d9a6b4d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -118,3 +118,6 @@ fallback_version = '22.0.0a0'
 #unsupported-operator = "ignore"
 #missing-argument = "ignore"
 #call-non-callable = "ignore"
+
+[tool.ty.src]
+exclude = ["pyarrow/tests"]

From 89e2e7593c59e6cea5d82ec807e6ca2e3de7a950 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 26 Jul 2025 18:31:47 +0200
Subject: [PATCH 32/32] Converging

---
 .github/workflows/python.yml         |  3 +--
 python/pyarrow/__init__.py           |  7 ++++---
 python/pyarrow/_compute.pyx          |  2 +-
 python/pyarrow/_dataset.pyx          |  4 ++--
 python/pyarrow/_dataset_parquet.pyx  |  2 +-
 python/pyarrow/_substrait.pyx        |  2 +-
 python/pyarrow/cffi.py               |  2 +-
 python/pyarrow/conftest.py           | 12 ++++++------
 python/pyarrow/fs.py                 |  2 +-
 python/pyarrow/lib.pyx               |  2 +-
 python/pyarrow/pandas_compat.py      |  4 ++--
 python/pyarrow/util.py               |  2 +-
 python/pyarrow/vendored/docscrape.py |  2 +-
 python/pyproject.toml                | 22 +---------------------
 14 files changed, 24 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 15906a10ac0..cb342b132d2 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -139,10 +139,9 @@ jobs:
         run: archery docker push ${{ matrix.image }}
 
       - name: Type check with ty
-        working-directory: python
         run: |-
             python -m pip install ty
-            python -m ty check
+            pushd python; python -m ty check
 
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 2b96edee84e..545c68c72e7 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -37,11 +37,12 @@
 import warnings as _warnings
 
 try:
-    from ._generated_version import version as __version__
+    from ._generated_version import version as __version__ \
+        # type: ignore[unresolved-import]
 except ImportError:
     # Package is not installed, parse git tag at runtime
     try:
-        import setuptools_scm
+        import setuptools_scm  # type: ignore[unresolved-import]
         # Code duplicated from setup.py to avoid a dependency on each other
 
         def parse_git(root, **kwargs):
@@ -49,7 +50,7 @@ def parse_git(root, **kwargs):
             Parse function for setuptools_scm that ignores tags for non-C++
             subprojects, e.g. apache-arrow-js-XXX tags.
             """
-            from setuptools_scm.git import parse
+            from setuptools_scm.git import parse  # type: ignore[unresolved-import]
             kwargs['describe_command'] = \
                 "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
             return parse(root, **kwargs)  # type: ignore[missing-argument]
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index ad0b116fdc6..59fd775b5ac 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -36,7 +36,7 @@ import inspect
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import warnings
 
 
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index abfd011fa21..d279881d15f 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -42,7 +42,7 @@ from pyarrow._json cimport ReadOptions as JsonReadOptions
 try:
     import pyarrow.substrait as pa_substrait
 except ImportError:
-    pass
+    pa_substrait = None
 
 
 _DEFAULT_BATCH_SIZE = 2**17
@@ -89,7 +89,7 @@ def _get_parquet_classes():
         try:
             import pyarrow._dataset_parquet as _dataset_pq
         except ImportError:
-            pass
+            _dataset_pq = None
 
 
 def _get_parquet_symbol(name):
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index e17867426dc..9405b5d8c54 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -59,7 +59,7 @@ try:
     )
     parquet_encryption_enabled = True
 except ImportError:
-    pass
+    parquet_encryption_enabled = False
 
 
 cdef Expression _true = Expression._scalar(True)
diff --git a/python/pyarrow/_substrait.pyx b/python/pyarrow/_substrait.pyx
index b317ba1e639..d9359c8e77d 100644
--- a/python/pyarrow/_substrait.pyx
+++ b/python/pyarrow/_substrait.pyx
@@ -29,7 +29,7 @@ from pyarrow.includes.libarrow_substrait cimport *
 try:
     import substrait as py_substrait
 except ImportError:
-    pass
+    py_substrait = None
 else:
     import substrait.proto  # no-cython-lint
 
diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py
index 3f5e748daf4..3ac74d6b17b 100644
--- a/python/pyarrow/cffi.py
+++ b/python/pyarrow/cffi.py
@@ -17,7 +17,7 @@
 
 from __future__ import absolute_import
 
-from cffi import FFI
+from cffi import FFI  # type: ignore[unresolved-import]
 
 c_source = """
     struct ArrowSchema {
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 563c98bfdc8..0c5f453fd13 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pytest
+import pytest  # type: ignore[unresolved-import]
 
 import os
 import pyarrow as pa
@@ -114,7 +114,7 @@
     defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo")
 
 try:
-    import cython  # noqa
+    import cython  # type: ignore[unresolved-import]  # noqa
     defaults['cython'] = True
 except ImportError:
     pass
@@ -156,13 +156,13 @@
     pass
 
 try:
-    import pandas  # noqa
+    import pandas  # type: ignore[unresolved-import]  # noqa
     defaults['pandas'] = True
 except ImportError:
     defaults['nopandas'] = True
 
 try:
-    import numpy  # noqa
+    import numpy  # type: ignore[unresolved-import]  # noqa
     defaults['numpy'] = True
 except ImportError:
     defaults['nonumpy'] = True
@@ -336,7 +336,7 @@ def unary_agg_func_fixture():
     Register a unary aggregate function (mean)
     """
     from pyarrow import compute as pc
-    import numpy as np
+    import numpy as np  # type: ignore[unresolved-import]
 
     def func(ctx, x):
         return pa.scalar(np.nanmean(x))
@@ -362,7 +362,7 @@ def varargs_agg_func_fixture():
     Register a unary aggregate function
     """
     from pyarrow import compute as pc
-    import numpy as np
+    import numpy as np  # type: ignore[unresolved-import]
 
     def func(ctx, *args):
         sum = 0.0
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index c7f1b325c70..11fc0697c9d 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -111,7 +111,7 @@ def _ensure_filesystem(filesystem, *, use_mmap=False):
     else:
         # handle fsspec-compatible filesystems
         try:
-            import fsspec
+            import fsspec  # type: ignore[unresolved-import]
         except ImportError:
             pass
         else:
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 2da25a570ae..5dca6fd8d2e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -24,7 +24,7 @@ import decimal as _pydecimal
 try:
     import numpy as np
 except ImportError:
-    pass
+    np = None
 import os
 import sys
 
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 970126da64c..f4f79edc8bf 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -31,7 +31,7 @@
 import warnings
 
 try:
-    import numpy as np
+    import numpy as np  # type: ignore[unresolved-import]
 except ImportError:
     pass
 
@@ -825,7 +825,7 @@ def table_to_dataframe(
     else:
         from pandas.core.internals import BlockManager \
             # type: ignore[unresolved_import]
-        from pandas import DataFrame
+        from pandas import DataFrame  # type: ignore[unresolved-import]
 
         blocks = [
             _reconstruct_block(item, column_names, ext_columns_dtypes)
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 5878d1f9026..5947da95b7b 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -238,7 +238,7 @@ def _download_urllib(url, out_path):
 
 
 def _download_requests(url, out_path):
-    import requests
+    import requests  # type: ignore[unresolved-import]
     with requests.get(url) as response:
         with open(out_path, 'wb') as f:
             f.write(response.content)
diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py
index 096ef245243..3fba2524a45 100644
--- a/python/pyarrow/vendored/docscrape.py
+++ b/python/pyarrow/vendored/docscrape.py
@@ -622,7 +622,7 @@ def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
         self._cls = cls
 
         if 'sphinx' in sys.modules:
-            from sphinx.ext.autodoc import ALL
+            from sphinx.ext.autodoc import ALL  # type: ignore[unresolved-import]
         else:
             ALL = object()
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 94a0d9a6b4d..c5cdca80b2e 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -98,26 +98,6 @@ version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
 fallback_version = '22.0.0a0'
 
-[tool.ty.rules]
-#invalid-argument-type = "ignore"
-#invalid-assignment = "ignore"
-#invalid-context-manager = "ignore"
-#invalid-return-type = "ignore"
-#invalid-type-form = "ignore"
-#no-matching-overload = "ignore"
-#non-subscriptable = "ignore"
-#not-iterable = "ignore"
-#possibly-unbound-attribute = "ignore"
-#possibly-unbound-import = "ignore"
-#too-many-positional-arguments = "ignore"
-#unknown-argument = "ignore"
-#unresolved-attribute = "ignore"
-#unresolved-global = "ignore"
-#unresolved-import = "ignore"
-#unresolved-reference = "ignore"
-#unsupported-operator = "ignore"
-#missing-argument = "ignore"
-#call-non-callable = "ignore"
-
 [tool.ty.src]
 exclude = ["pyarrow/tests"]
+include = ["pyarrow"]