diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index bc3b9e8c558..453f487c4de 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2345,6 +2345,15 @@ cdef class Expression(_Weakrefable): return self.expr def equals(self, Expression other): + """ + Parameters + ---------- + other : pyarrow.dataset.Expression + + Returns + ------- + bool + """ return self.expr.Equals(other.unwrap()) def __str__(self): diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index 0b72f5249f9..e532d8d8ab2 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -290,6 +290,15 @@ cdef class ReadOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ReadOptions other): + """ + Parameters + ---------- + other : pyarrow.csv.ReadOptions + + Returns + ------- + bool + """ return ( self.use_threads == other.use_threads and self.block_size == other.block_size and @@ -536,6 +545,15 @@ cdef class ParseOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ParseOptions other): + """ + Parameters + ---------- + other : pyarrow.csv.ParseOptions + + Returns + ------- + bool + """ return ( self.delimiter == other.delimiter and self.quote_char == other.quote_char and @@ -1042,6 +1060,15 @@ cdef class ConvertOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ConvertOptions other): + """ + Parameters + ---------- + other : pyarrow.csv.ConvertOptions + + Returns + ------- + bool + """ return ( self.check_utf8 == other.check_utf8 and self.column_types == other.column_types and diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index badf6e4a4c5..8f5688de290 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -1992,9 +1992,27 @@ cdef class IpcFileFormat(FileFormat): self.init(shared_ptr[CFileFormat](new CIpcFileFormat())) def equals(self, IpcFileFormat other): + """ + Parameters + ---------- + other : pyarrow.dataset.IpcFileFormat + + Returns + ------- + True + """ return True def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.ipc.IpcWriteOptions + """ cdef IpcFileWriteOptions opts = \ FileFormat.make_write_options(self) opts.write_options = IpcWriteOptions(**kwargs) @@ -2071,6 +2089,15 @@ cdef class CsvFileFormat(FileFormat): self.csv_format = sp.get() def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.csv.WriteOptions + """ cdef CsvFileWriteOptions opts = \ FileFormat.make_write_options(self) opts.write_options = WriteOptions(**kwargs) @@ -2093,6 +2120,15 @@ cdef class CsvFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, CsvFileFormat other): + """ + Parameters + ---------- + other : pyarrow.dataset.CsvFileFormat + + Returns + ------- + bool + """ return ( self.parse_options.equals(other.parse_options) and self.default_fragment_scan_options == @@ -2165,6 +2201,15 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions): make_streamwrap_func(read_options.encoding, 'utf-8')) def equals(self, CsvFragmentScanOptions other): + """ + Parameters + ---------- + other : pyarrow.dataset.CsvFragmentScanOptions + + Returns + ------- + bool + """ return ( other and self.convert_options.equals(other.convert_options) and @@ -2250,6 +2295,15 @@ cdef class JsonFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, JsonFileFormat other): + """ + Parameters + ---------- + other : pyarrow.dataset.JsonFileFormat + + Returns + ------- + bool + """ return (other and self.default_fragment_scan_options == other.default_fragment_scan_options) @@ -2308,6 +2362,15 @@ cdef class JsonFragmentScanOptions(FragmentScanOptions): self.json_options.read_options = read_options.options def equals(self, JsonFragmentScanOptions other): + """ + Parameters + ---------- + other : pyarrow.dataset.JsonFragmentScanOptions + + Returns + ------- + bool + """ return ( other and self.read_options.equals(other.read_options) and @@ -2353,6 +2416,17 @@ cdef class Partitioning(_Weakrefable): return False def parse(self, path): + """ + Parse a path into a partition expression. + + Parameters + ---------- + path : str + + Returns + ------- + pyarrow.dataset.Expression + """ cdef CResult[CExpression] result result = self.partitioning.Parse(tobytes(path)) return Expression.wrap(GetResultValue(result)) diff --git a/python/pyarrow/_dataset_orc.pyx b/python/pyarrow/_dataset_orc.pyx index 40a21ef5462..a8cce336222 100644 --- a/python/pyarrow/_dataset_orc.pyx +++ b/python/pyarrow/_dataset_orc.pyx @@ -32,6 +32,15 @@ cdef class OrcFileFormat(FileFormat): self.init(shared_ptr[CFileFormat](new COrcFileFormat())) def equals(self, OrcFileFormat other): + """ + Parameters + ---------- + other : pyarrow.dataset.OrcFileFormat + + Returns + ------- + True + """ return True @property diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 4ad0caec307..4de396f4f50 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -178,6 +178,15 @@ cdef class ParquetFileFormat(FileFormat): return parquet_read_options def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.dataset.FileWriteOptions + """ opts = FileFormat.make_write_options(self) ( opts).update(**kwargs) return opts @@ -189,6 +198,15 @@ cdef class ParquetFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, ParquetFileFormat other): + """ + Parameters + ---------- + other : pyarrow.dataset.ParquetFileFormat + + Returns + ------- + bool + """ return ( self.read_options.equals(other.read_options) and self.default_fragment_scan_options == @@ -502,6 +520,15 @@ cdef class ParquetReadOptions(_Weakrefable): self._coerce_int96_timestamp_unit = TimeUnit_NANO def equals(self, ParquetReadOptions other): + """ + Parameters + ---------- + other : pyarrow.dataset.ParquetReadOptions + + Returns + ------- + bool + """ return (self.dictionary_columns == other.dictionary_columns and self.coerce_int96_timestamp_unit == other.coerce_int96_timestamp_unit) @@ -527,6 +554,11 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): object _properties def update(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + """ arrow_fields = { "use_deprecated_int96_timestamps", "coerce_timestamps", @@ -720,6 +752,15 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): self.reader_properties().set_thrift_container_size_limit(size) def equals(self, ParquetFragmentScanOptions other): + """ + Parameters + ---------- + other : pyarrow.dataset.ParquetFragmentScanOptions + + Returns + ------- + bool + """ attrs = ( self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.thrift_string_size_limit, self.thrift_container_size_limit) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 69105afc2fc..dbd7ebe5e4d 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -505,6 +505,15 @@ cdef class FileSystem(_Weakrefable): return self.wrapped def equals(self, FileSystem other): + """ + Parameters + ---------- + other : pyarrow.fs.FileSystem + + Returns + ------- + bool + """ return self.fs.Equals(other.unwrap()) def __eq__(self, other): diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx index 70cde6e23fe..d36dad67abb 100644 --- a/python/pyarrow/_json.pyx +++ b/python/pyarrow/_json.pyx @@ -83,6 +83,15 @@ cdef class ReadOptions(_Weakrefable): ) def equals(self, ReadOptions other): + """ + Parameters + ---------- + other : pyarrow.json.ReadOptions + + Returns + ------- + bool + """ return ( self.use_threads == other.use_threads and self.block_size == other.block_size @@ -212,6 +221,15 @@ cdef class ParseOptions(_Weakrefable): self.options.unexpected_field_behavior = v def equals(self, ParseOptions other): + """ + Parameters + ---------- + other : pyarrow.json.ParseOptions + + Returns + ------- + bool + """ return ( self.explicit_schema == other.explicit_schema and self.newlines_in_values == other.newlines_in_values and diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 4448f359ac1..50b4ed8e86e 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1183,6 +1183,22 @@ cdef class ParquetReader(_Weakrefable): FileDecryptionProperties decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None): + """ + Open a parquet file for reading. + + Parameters + ---------- + source : str, pathlib.Path, pyarrow.NativeFile, or file-like object + use_memory_map : bool, default False + read_dictionary : iterable[int or str], optional + metadata : FileMetaData, optional + buffer_size : int, default 0 + pre_buffer : bool, default False + coerce_int96_timestamp_unit : str, optional + decryption_properties : FileDecryptionProperties, optional + thrift_string_size_limit : int, optional + thrift_container_size_limit : int, optional + """ cdef: shared_ptr[CFileMetaData] c_metadata CReaderProperties properties = default_reader_properties() @@ -1285,13 +1301,35 @@ cdef class ParquetReader(_Weakrefable): return self.reader.get().num_row_groups() def set_use_threads(self, bint use_threads): + """ + Parameters + ---------- + use_threads : bool + """ self.reader.get().set_use_threads(use_threads) def set_batch_size(self, int64_t batch_size): + """ + Parameters + ---------- + batch_size : int64 + """ self.reader.get().set_batch_size(batch_size) def iter_batches(self, int64_t batch_size, row_groups, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + batch_size : int64 + row_groups : list[int] + column_indices : list[int], optional + use_threads : bool, default True + + Yields + ------ + next : RecordBatch + """ cdef: vector[int] c_row_groups vector[int] c_column_indices @@ -1336,10 +1374,32 @@ cdef class ParquetReader(_Weakrefable): def read_row_group(self, int i, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + i : int + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ return self.read_row_groups([i], column_indices, use_threads) def read_row_groups(self, row_groups not None, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + row_groups : list[int] + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ cdef: shared_ptr[CTable] ctable vector[int] c_row_groups @@ -1366,6 +1426,16 @@ cdef class ParquetReader(_Weakrefable): return pyarrow_wrap_table(ctable) def read_all(self, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ cdef: shared_ptr[CTable] ctable vector[int] c_column_indices @@ -1387,6 +1457,16 @@ cdef class ParquetReader(_Weakrefable): return pyarrow_wrap_table(ctable) def scan_contents(self, column_indices=None, batch_size=65536): + """ + Parameters + ---------- + column_indices : list[int], optional + batch_size : int32, default 65536 + + Returns + ------- + num_rows : int64 + """ cdef: vector[int] c_column_indices int32_t c_batch_size @@ -1434,6 +1514,18 @@ cdef class ParquetReader(_Weakrefable): return self._column_idx_map[tobytes(column_name)] def read_column(self, int column_index): + """ + Read the column at the specified index. + + Parameters + ---------- + column_index : int + Index of the column. + + Returns + ------- + column : pyarrow.ChunkedArray + """ cdef shared_ptr[CChunkedArray] out with nogil: check_status(self.reader.get() diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 2f8959cd721..ce4eafd8e30 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1265,6 +1265,17 @@ cdef class Array(_PandasConvertible): return frombytes(result, safe=True) def format(self, **kwargs): + """ + DEPRECATED, use pyarrow.Array.to_string + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + str + """ import warnings warnings.warn('Array.format is deprecated, use Array.to_string') return self.to_string(**kwargs) @@ -1281,6 +1292,15 @@ cdef class Array(_PandasConvertible): return NotImplemented def equals(Array self, Array other not None): + """ + Parameters + ---------- + other : pyarrow.Array + + Returns + ------- + bool + """ return self.ap.Equals(deref(other.ap)) def __len__(self): @@ -2336,6 +2356,19 @@ cdef class UnionArray(Array): """ def child(self, int pos): + """ + DEPRECATED, use field() instead. + + Parameters + ---------- + pos : int + The physical index of the union child field (not its type code). + + Returns + ------- + field : pyarrow.Field + The given child field. + """ import warnings warnings.warn("child is deprecated, use field", FutureWarning) return self.field(pos) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index cc46bc760ff..35bbf5018f0 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -191,6 +191,19 @@ cdef class Projector(_Weakrefable): return self.projector.get().DumpIR().decode() def evaluate(self, RecordBatch batch, SelectionVector selection=None): + """ + Evaluate the specified record batch and return the arrays at the + filtered positions. + + Parameters + ---------- + batch : pyarrow.RecordBatch + selection : pyarrow.gandiva.SelectionVector + + Returns + ------- + list[pyarrow.Array] + """ cdef vector[shared_ptr[CArray]] results if selection is None: check_status(self.projector.get().Evaluate( @@ -227,6 +240,19 @@ cdef class Filter(_Weakrefable): return self.filter.get().DumpIR().decode() def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'): + """ + Evaluate the specified record batch and return a selection vector. + + Parameters + ---------- + batch : pyarrow.RecordBatch + pool : MemoryPool + dtype : DataType or str, default int32 + + Returns + ------- + pyarrow.gandiva.SelectionVector + """ cdef: DataType type = ensure_type(dtype) shared_ptr[CSelectionVector] selection @@ -252,6 +278,18 @@ cdef class Filter(_Weakrefable): cdef class TreeExprBuilder(_Weakrefable): def make_literal(self, value, dtype): + """ + Create a node on a literal. + + Parameters + ---------- + value : a literal value + dtype : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef: DataType type = ensure_type(dtype) shared_ptr[CNode] r @@ -289,6 +327,19 @@ cdef class TreeExprBuilder(_Weakrefable): def make_expression(self, Node root_node not None, Field return_field not None): + """ + Create an expression with the specified root_node, + and the result written to result_field. + + Parameters + ---------- + root_node : pyarrow.gandiva.Node + return_field : pyarrow.Field + + Returns + ------- + pyarrow.gandiva.Expression + """ cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression( root_node.node, return_field.sp_field) cdef Expression expression = Expression() @@ -296,6 +347,19 @@ cdef class TreeExprBuilder(_Weakrefable): return expression def make_function(self, name, children, DataType return_type): + """ + Create a node with a function. + + Parameters + ---------- + name : str + children : pyarrow.gandiva.NodeVector + return_type : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -307,17 +371,53 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_field(self, Field field not None): + """ + Create a node with an Arrow field. + + Parameters + ---------- + field : pyarrow.Field + + Returns + ------- + pyarrow.gandiva.Node + """ cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field) return Node.create(r) def make_if(self, Node condition not None, Node this_node not None, Node else_node not None, DataType return_type not None): + """ + Create a node with an if-else expression. + + Parameters + ---------- + condition : pyarrow.gandiva.Node + this_node : pyarrow.gandiva.Node + else_node : pyarrow.gandiva.Node + return_type : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf( condition.node, this_node.node, else_node.node, return_type.sp_type) return Node.create(r) def make_and(self, children): + """ + Create a Node with a boolean AND expression. + + Parameters + ---------- + children : list[pyarrow.gandiva.Node] + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -328,6 +428,17 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_or(self, children): + """ + Create a Node with a boolean OR expression. + + Parameters + ---------- + children : list[pyarrow.gandiva.Node] + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -420,6 +531,19 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_in_expression(self, Node node not None, values, dtype): + """ + Create a Node with an IN expression. + + Parameters + ---------- + node : pyarrow.gandiva.Node + values : iterable + dtype : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef DataType type = ensure_type(dtype) if type.id == _Type_INT32: @@ -444,6 +568,17 @@ cdef class TreeExprBuilder(_Weakrefable): raise TypeError("Data type " + str(dtype) + " not supported.") def make_condition(self, Node condition not None): + """ + Create a condition with the specified node. + + Parameters + ---------- + condition : pyarrow.gandiva.Node + + Returns + ------- + pyarrow.gandiva.Condition + """ cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition( condition.node) return Condition.create(r) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 2a78f7e7954..e3018ab4704 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -575,6 +575,14 @@ cdef class NativeFile(_Weakrefable): return line def read_buffer(self, nbytes=None): + """ + Read from buffer. + + Parameters + ---------- + nbytes : int, optional + maximum number of bytes read + """ cdef: int64_t c_nbytes int64_t bytes_read = 0 @@ -602,6 +610,14 @@ cdef class NativeFile(_Weakrefable): raise UnsupportedOperation() def writelines(self, lines): + """ + Write lines to the file. + + Parameters + ---------- + lines : iterable + Iterable of bytes-like objects or exporters of buffer protocol + """ self._assert_writable() for line in lines: @@ -865,12 +881,35 @@ cdef class PythonFile(NativeFile): self.is_writable = True def truncate(self, pos=None): + """ + Parameters + ---------- + pos : int, optional + """ self.handle.truncate(pos) def readline(self, size=None): + """ + Read and return a line of bytes from the file. + + If size is specified, read at most size bytes. + + Parameters + ---------- + size : int + Maximum number of bytes read + """ return self.handle.readline(size) def readlines(self, hint=None): + """ + Read lines of the file. + + Parameters + ---------- + hint : int + Maximum number of bytes read until we stop + """ return self.handle.readlines(hint) @@ -1146,16 +1185,31 @@ cdef class FixedSizeBufferWriter(NativeFile): self.is_writable = True def set_memcopy_threads(self, int num_threads): + """ + Parameters + ---------- + num_threads : int + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_threads(num_threads) def set_memcopy_blocksize(self, int64_t blocksize): + """ + Parameters + ---------- + blocksize : int64 + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_blocksize(blocksize) def set_memcopy_threshold(self, int64_t threshold): + """ + Parameters + ---------- + threshold : int64 + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_threshold(threshold) diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index aff1c311abb..e19807ba56e 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -123,6 +123,15 @@ cdef class Scalar(_Weakrefable): return str(self.as_py()) def equals(self, Scalar other not None): + """ + Parameters + ---------- + other : pyarrow.Scalar + + Returns + ------- + bool + """ return self.wrapped.get().Equals(other.unwrap().get()[0]) def __eq__(self, other): diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index f08162089b8..2eae38485dc 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -160,6 +160,17 @@ cdef class ChunkedArray(_PandasConvertible): return frombytes(result, safe=True) def format(self, **kwargs): + """ + DEPRECATED, use pyarrow.ChunkedArray.to_string + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + str + """ import warnings warnings.warn('ChunkedArray.format is deprecated, ' 'use ChunkedArray.to_string') diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index ba732879aad..1afce7f4a10 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -316,6 +316,17 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCOOTensor + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list[str], optional + Names of the dimensions. + + Returns + ------- + pyarrow.SparseCOOTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -549,6 +560,18 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + + Returns + ------- + str + """ return frombytes(self.stp.dim_name(i)) @property @@ -601,6 +624,10 @@ shape: {0.shape}""".format(self) The dense numpy array that should be converted. dim_names : list, optional The names of the dimensions. + + Returns + ------- + pyarrow.SparseCSRMatrix """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -781,6 +808,18 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + + Returns + ------- + str + """ return frombytes(self.stp.dim_name(i)) @property @@ -816,6 +855,17 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSCMatrix + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list[str], optional + Names of the dimensions. + + Returns + ------- + pyarrow.SparseCSCMatrix """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -997,6 +1047,18 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + + Returns + ------- + str + """ return frombytes(self.stp.dim_name(i)) @property @@ -1040,6 +1102,17 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSFTensor + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list[str], optional + Names of the dimensions. + + Returns + ------- + pyarrow.SparseCSFTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -1190,6 +1263,18 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + + Returns + ------- + str + """ return frombytes(self.stp.dim_name(i)) @property diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 12ad2fc4b6f..f2dd59a0f1a 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -199,6 +199,15 @@ cdef class DataType(_Weakrefable): self.pep3118_format = _datatype_to_pep3118(self.type) cpdef Field field(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + pyarrow.Field + """ if not isinstance(i, int): raise TypeError(f"Expected int index, got type '{type(i)}'") cdef int index = _normalize_index(i, self.type.num_fields()) @@ -1886,6 +1895,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping): return self.wrapped def equals(self, KeyValueMetadata other): + """ + Parameters + ---------- + other : pyarrow.KeyValueMetadata + + Returns + ------- + bool + """ return self.metadata.Equals(deref(other.wrapped)) def __repr__(self): @@ -1925,9 +1943,27 @@ cdef class KeyValueMetadata(_Metadata, Mapping): return KeyValueMetadata, (list(self.items()),) def key(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + byte + """ return self.metadata.key(i) def value(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + byte + """ return self.metadata.value(i) def keys(self): @@ -1943,6 +1979,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping): yield (self.metadata.key(i), self.metadata.value(i)) def get_all(self, key): + """ + Parameters + ---------- + key : str + + Returns + ------- + list[byte] + """ key = tobytes(key) return [v for k, v in self.items() if k == key]