Skip to content
This repository has been archived by the owner on Feb 17, 2023. It is now read-only.

Commit

Permalink
Add docstrings to file_rados_parquet.cc/h
Browse files Browse the repository at this point in the history
  • Loading branch information
YashJipkate authored and JayjeetAtGithub committed Jul 24, 2021
1 parent d96a596 commit 79e3073
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/dataset/file_rados_parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace flatbuf = org::apache::arrow::flatbuf;

namespace dataset {

/// \brief A ScanTask backed by an RadosParquet file.
class RadosParquetScanTask : public ScanTask {
public:
RadosParquetScanTask(std::shared_ptr<ScanOptions> options,
Expand Down
35 changes: 35 additions & 0 deletions cpp/src/arrow/dataset/file_rados_parquet.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
namespace arrow {
namespace dataset {

/// \addtogroup dataset-file-formats
///
/// @{

/// \brief An interface to connect to a RADOS cluster and hold the connection
/// information for usage in later stages.
class ARROW_DS_EXPORT RadosCluster {
public:
struct RadosConnectionCtx {
Expand All @@ -68,6 +74,8 @@ class ARROW_DS_EXPORT RadosCluster {

~RadosCluster() { Shutdown(); }

/// \brief Connect to the Rados cluster.
/// \return Status.
Status Connect() {
if (rados->init2(ctx.user_name.c_str(), ctx.cluster_name.c_str(), 0))
return Status::Invalid("librados::init2 returned non-zero exit code.");
Expand All @@ -84,6 +92,8 @@ class ARROW_DS_EXPORT RadosCluster {
return Status::OK();
}

/// \brief Shutdown the connection to the Rados cluster.
/// \return Status.
Status Shutdown() {
rados->shutdown();
return Status::OK();
Expand All @@ -94,11 +104,18 @@ class ARROW_DS_EXPORT RadosCluster {
IoCtxInterface* ioCtx;
};

/// \brief Interface for translating the name of a file in CephFS to its
/// corresponding object ID in RADOS assuming 1:1 mapping between a file
/// and its underlying object.
class ARROW_DS_EXPORT DirectObjectAccess {
public:
explicit DirectObjectAccess(const std::shared_ptr<RadosCluster>& cluster)
: cluster_(std::move(cluster)) {}

/// \brief Executes the POSIX stat call on a file.
/// \param[in] path Path of the file.
/// \param[out] st Refernce to the struct object to store the result.
/// \return Status.
Status Stat(const std::string& path, struct stat& st) {
struct stat file_st;
if (stat(path.c_str(), &file_st) < 0)
Expand All @@ -114,6 +131,13 @@ class ARROW_DS_EXPORT DirectObjectAccess {
return oid;
}

/// \brief Executes query on the librados node. It uses the librados::exec API to
/// perform queries on the storage node and stores the result in the output buffer.
/// \param[in] inode inode of the file.
/// \param[in] fn The function to be executed by the librados::exec call.
/// \param[in] in The input buffer.
/// \param[out] in The output buffer.
/// \return Status.
Status Exec(uint64_t inode, const std::string& fn, ceph::bufferlist& in,
ceph::bufferlist& out) {
std::string oid = ConvertFileInodeToObjectID(inode);
Expand All @@ -128,6 +152,8 @@ class ARROW_DS_EXPORT DirectObjectAccess {
std::shared_ptr<RadosCluster> cluster_;
};

/// \brief A ParquetFileFormat implementation that offloads the fragment
/// scan operations to the Ceph OSDs
class ARROW_DS_EXPORT RadosParquetFileFormat : public ParquetFileFormat {
public:
explicit RadosParquetFileFormat(const std::string&, const std::string&,
Expand All @@ -146,8 +172,15 @@ class ARROW_DS_EXPORT RadosParquetFileFormat : public ParquetFileFormat {

Result<bool> IsSupported(const FileSource& source) const override { return true; }

/// \brief Return the schema of the file fragment.
/// \param[in] source The source of the file fragment.
/// \return The schema of the file fragment.
Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;

/// \brief Scan a file fragment.
/// \param[in] options Options to pass.
/// \param[in] file The file fragment.
/// \return The scanned file fragment.
Result<ScanTaskIterator> ScanFile(
const std::shared_ptr<ScanOptions>& options,
const std::shared_ptr<FileFragment>& file) const override;
Expand Down Expand Up @@ -178,5 +211,7 @@ ARROW_DS_EXPORT Status SerializeTable(std::shared_ptr<Table>& table,

ARROW_DS_EXPORT Status DeserializeTable(RecordBatchVector& batches, ceph::bufferlist& bl);

/// @}

} // namespace dataset
} // namespace arrow

0 comments on commit 79e3073

Please sign in to comment.