Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c++/include/orc/OrcFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ namespace orc {
ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);

/**
* Create a reader to the for the ORC file.
* Create a reader to read the ORC file.
* @param stream the stream to read
* @param options the options for reading the file
*/
Expand Down
27 changes: 14 additions & 13 deletions c++/include/orc/Reader.hh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace orc {
ReaderOptions& setErrorStream(std::ostream& stream);

/**
* Open the file used a serialized copy of the file tail.
* Set a serialized copy of the file tail to be used when opening the file.
*
* When one process opens the file and other processes need to read
* the rows, we want to enable clients to just read the tail once.
Expand Down Expand Up @@ -236,7 +236,7 @@ namespace orc {

/**
* The interface for reading ORC file meta-data and constructing RowReaders.
* This is an an abstract class that will subclassed as necessary.
* This is an an abstract class that will be subclassed as necessary.
*/
class Reader {
public:
Expand All @@ -257,7 +257,7 @@ namespace orc {

/**
* Get the user metadata keys.
* @return the set of metadata keys
* @return the set of user metadata keys
*/
virtual std::list<std::string> getMetadataKeys() const = 0;

Expand Down Expand Up @@ -306,7 +306,7 @@ namespace orc {
virtual WriterVersion getWriterVersion() const = 0;

/**
* Get the number of rows per a entry in the row index.
* Get the number of rows per an entry in the row index.
* @return the number of rows per an entry in the row index or 0 if there
* is no row index.
*/
Expand All @@ -320,7 +320,7 @@ namespace orc {

/**
* Get the information about a stripe.
* @param stripeIndex the stripe 0 to N-1 to get information about
* @param stripeIndex the index of the stripe (0 to N-1) to get information about
* @return the information about that stripe
*/
virtual ORC_UNIQUE_PTR<StripeInformation>
Expand All @@ -334,7 +334,7 @@ namespace orc {

/**
* Get the statistics about a stripe.
* @param stripeIndex the stripe 0 to N-1 to get statistics about
* @param stripeIndex the index of the stripe (0 to N-1) to get statistics about
* @return the statistics about that stripe
*/
virtual ORC_UNIQUE_PTR<StripeStatistics>
Expand All @@ -347,19 +347,19 @@ namespace orc {
virtual uint64_t getContentLength() const = 0;

/**
* Get the length of the file stripe statistics
* Get the length of the file stripe statistics.
* @return the number of compressed bytes in the file stripe statistics
*/
virtual uint64_t getStripeStatisticsLength() const = 0;

/**
* Get the length of the file footer
* Get the length of the file footer.
* @return the number of compressed bytes in the file footer
*/
virtual uint64_t getFileFooterLength() const = 0;

/**
* Get the length of the file postscript
* Get the length of the file postscript.
* @return the number of bytes in the file postscript
*/
virtual uint64_t getFilePostscriptLength() const = 0;
Expand All @@ -378,13 +378,14 @@ namespace orc {

/**
* Get the statistics about a single column in the file.
* @param columnId id of the column
* @return the information about the column
*/
virtual ORC_UNIQUE_PTR<ColumnStatistics>
getColumnStatistics(uint32_t columnId) const = 0;

/**
* check file has correct column statistics
* Check if the file has correct column statistics.
*/
virtual bool hasCorrectStatistics() const = 0;

Expand Down Expand Up @@ -443,17 +444,17 @@ namespace orc {
virtual uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;

/**
* @param names Column Names
* @param stripeIx index of the stripe to be read (if not specified,
* all stripes are considered).
* @param names Column Names
* @return upper bound on memory use by selected columns
*/
virtual uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) = 0;

/**
* @param include Column Type Ids
* @param stripeIx index of the stripe to be read (if not specified,
* all stripes are considered).
* @param include Column Type Ids
* @return upper bound on memory use by selected columns
*/
virtual uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;
Expand All @@ -462,7 +463,7 @@ namespace orc {

/**
* The interface for reading rows in ORC files.
* This is an an abstract class that will subclassed as necessary.
* This is an an abstract class that will be subclassed as necessary.
*/
class RowReader {
public:
Expand Down
64 changes: 34 additions & 30 deletions c++/include/orc/Statistics.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ namespace orc {
virtual uint64_t getNumberOfValues() const = 0;

/**
* Check whether column has null value
* Check whether column has null value.
* @return true if has null value
*/
virtual bool hasNull() const = 0;

/**
* print out statistics of column if any
* Print out statistics of column if any.
*/
virtual std::string toString() const = 0;
};
Expand All @@ -59,7 +59,7 @@ namespace orc {
virtual ~BinaryColumnStatistics();

/**
* check whether column has total length
* Check whether column has total length.
* @return true if has total length
*/
virtual bool hasTotalLength() const = 0;
Expand All @@ -75,7 +75,7 @@ namespace orc {
virtual ~BooleanColumnStatistics();

/**
* check whether column has true/false count
* Check whether column has true/false count.
* @return true if has true/false count
*/
virtual bool hasCount() const = 0;
Expand All @@ -92,13 +92,13 @@ namespace orc {
virtual ~DateColumnStatistics();

/**
* check whether column has minimum
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column has maximum
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
Expand All @@ -124,19 +124,19 @@ namespace orc {
virtual ~DecimalColumnStatistics();

/**
* check whether column has minimum
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column has maximum
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;

/**
* check whether column has sum
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
Expand Down Expand Up @@ -168,19 +168,19 @@ namespace orc {
virtual ~DoubleColumnStatistics();

/**
* check whether column has minimum
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column has maximum
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;

/**
* check whether column has sum
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
Expand Down Expand Up @@ -215,19 +215,19 @@ namespace orc {
virtual ~IntegerColumnStatistics();

/**
* check whether column has minimum
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column has maximum
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;

/**
* check whether column has sum
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
Expand Down Expand Up @@ -261,20 +261,20 @@ namespace orc {
virtual ~StringColumnStatistics();

/**
* check whether column has minimum
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column has maximum
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;

/**
* check whether column
* @return true if has maximum
* Check whether column has total length.
* @return true if has total length
*/
virtual bool hasTotalLength() const = 0;

Expand Down Expand Up @@ -305,13 +305,13 @@ namespace orc {
virtual ~TimestampColumnStatistics();

/**
* check whether column minimum
* Check whether column minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;

/**
* check whether column maximum
* Check whether column maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
Expand All @@ -329,13 +329,13 @@ namespace orc {
virtual int64_t getMaximum() const = 0;

/**
* check whether column has a lowerBound
* Check whether column has a lowerBound.
* @return true if column has a lowerBound
*/
virtual bool hasLowerBound() const = 0;

/**
* check whether column has an upperBound
* Check whether column has an upperBound.
* @return true if column has an upperBound
*/
virtual bool hasUpperBound() const = 0;
Expand All @@ -360,14 +360,15 @@ namespace orc {
virtual ~Statistics();

/**
* Get the statistics of colId column.
* Get the statistics of the given column.
* @param colId id of the column
* @return one column's statistics
*/
virtual const ColumnStatistics* getColumnStatistics(uint32_t colId
) const = 0;

/**
* Get the number of columns
* Get the number of columns.
* @return the number of columns
*/
virtual uint32_t getNumberOfColumns() const = 0;
Expand All @@ -378,16 +379,19 @@ namespace orc {
virtual ~StripeStatistics();

/**
* Get the RowIndex statistics of a column id.
* @return one stripe RowIndex statistics
* Get the statistics of a given RowIndex entry in a given column.
* @param columnId id of the column
* @param rowIndexId RowIndex entry id
* @return statistics of the given RowIndex entry
*/
virtual const ColumnStatistics*
getRowIndexStatistics(
uint32_t columnId, uint32_t IndexId) const = 0;
uint32_t columnId, uint32_t rowIndexId) const = 0;

/**
* Get the number of RowIndexes
* @return the number of RowIndex Statistics
* Get the number of RowIndex statistics in a given column.
* @param columnId id of the column
* @return the number of RowIndex statistics
*/
virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0;
};
Expand Down
6 changes: 3 additions & 3 deletions c++/include/orc/Vector.hh
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ namespace orc {

/**
* The offset of the first element of each list.
* The length of list i is startOffset[i+1] - startOffset[i].
* The length of list i is offsets[i+1] - offsets[i].
*/
DataBuffer<int64_t> offsets;

Expand All @@ -151,8 +151,8 @@ namespace orc {
bool hasVariableLength();

/**
* The offset of the first element of each list.
* The length of list i is startOffset[i+1] - startOffset[i].
* The offset of the first element of each map.
* The size of map i is offsets[i+1] - offsets[i].
*/
DataBuffer<int64_t> offsets;

Expand Down
Loading