diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh index bd866c28b8..c64853168a 100644 --- a/c++/include/orc/OrcFile.hh +++ b/c++/include/orc/OrcFile.hh @@ -121,7 +121,7 @@ namespace orc { ORC_UNIQUE_PTR readHdfsFile(const std::string& path); /** - * Create a reader to the for the ORC file. + * Create a reader to read the ORC file. * @param stream the stream to read * @param options the options for reading the file */ diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh index b5e2e7ee32..5818b46a86 100644 --- a/c++/include/orc/Reader.hh +++ b/c++/include/orc/Reader.hh @@ -55,7 +55,7 @@ namespace orc { ReaderOptions& setErrorStream(std::ostream& stream); /** - * Open the file used a serialized copy of the file tail. + * Set a serialized copy of the file tail to be used when opening the file. * * When one process opens the file and other processes need to read * the rows, we want to enable clients to just read the tail once. @@ -236,7 +236,7 @@ namespace orc { /** * The interface for reading ORC file meta-data and constructing RowReaders. - * This is an an abstract class that will subclassed as necessary. + * This is an an abstract class that will be subclassed as necessary. */ class Reader { public: @@ -257,7 +257,7 @@ namespace orc { /** * Get the user metadata keys. - * @return the set of metadata keys + * @return the set of user metadata keys */ virtual std::list getMetadataKeys() const = 0; @@ -306,7 +306,7 @@ namespace orc { virtual WriterVersion getWriterVersion() const = 0; /** - * Get the number of rows per a entry in the row index. + * Get the number of rows per an entry in the row index. * @return the number of rows per an entry in the row index or 0 if there * is no row index. */ @@ -320,7 +320,7 @@ namespace orc { /** * Get the information about a stripe. - * @param stripeIndex the stripe 0 to N-1 to get information about + * @param stripeIndex the index of the stripe (0 to N-1) to get information about * @return the information about that stripe */ virtual ORC_UNIQUE_PTR @@ -334,7 +334,7 @@ namespace orc { /** * Get the statistics about a stripe. - * @param stripeIndex the stripe 0 to N-1 to get statistics about + * @param stripeIndex the index of the stripe (0 to N-1) to get statistics about * @return the statistics about that stripe */ virtual ORC_UNIQUE_PTR @@ -347,19 +347,19 @@ namespace orc { virtual uint64_t getContentLength() const = 0; /** - * Get the length of the file stripe statistics + * Get the length of the file stripe statistics. * @return the number of compressed bytes in the file stripe statistics */ virtual uint64_t getStripeStatisticsLength() const = 0; /** - * Get the length of the file footer + * Get the length of the file footer. * @return the number of compressed bytes in the file footer */ virtual uint64_t getFileFooterLength() const = 0; /** - * Get the length of the file postscript + * Get the length of the file postscript. * @return the number of bytes in the file postscript */ virtual uint64_t getFilePostscriptLength() const = 0; @@ -378,13 +378,14 @@ namespace orc { /** * Get the statistics about a single column in the file. + * @param columnId id of the column * @return the information about the column */ virtual ORC_UNIQUE_PTR getColumnStatistics(uint32_t columnId) const = 0; /** - * check file has correct column statistics + * Check if the file has correct column statistics. */ virtual bool hasCorrectStatistics() const = 0; @@ -443,17 +444,17 @@ namespace orc { virtual uint64_t getMemoryUseByFieldId(const std::list& include, int stripeIx=-1) = 0; /** + * @param names Column Names * @param stripeIx index of the stripe to be read (if not specified, * all stripes are considered). - * @param names Column Names * @return upper bound on memory use by selected columns */ virtual uint64_t getMemoryUseByName(const std::list& names, int stripeIx=-1) = 0; /** + * @param include Column Type Ids * @param stripeIx index of the stripe to be read (if not specified, * all stripes are considered). - * @param include Column Type Ids * @return upper bound on memory use by selected columns */ virtual uint64_t getMemoryUseByTypeId(const std::list& include, int stripeIx=-1) = 0; @@ -462,7 +463,7 @@ namespace orc { /** * The interface for reading rows in ORC files. - * This is an an abstract class that will subclassed as necessary. + * This is an an abstract class that will be subclassed as necessary. */ class RowReader { public: diff --git a/c++/include/orc/Statistics.hh b/c++/include/orc/Statistics.hh index 7a7d1a87cf..5b894789d8 100644 --- a/c++/include/orc/Statistics.hh +++ b/c++/include/orc/Statistics.hh @@ -40,13 +40,13 @@ namespace orc { virtual uint64_t getNumberOfValues() const = 0; /** - * Check whether column has null value + * Check whether column has null value. * @return true if has null value */ virtual bool hasNull() const = 0; /** - * print out statistics of column if any + * Print out statistics of column if any. */ virtual std::string toString() const = 0; }; @@ -59,7 +59,7 @@ namespace orc { virtual ~BinaryColumnStatistics(); /** - * check whether column has total length + * Check whether column has total length. * @return true if has total length */ virtual bool hasTotalLength() const = 0; @@ -75,7 +75,7 @@ namespace orc { virtual ~BooleanColumnStatistics(); /** - * check whether column has true/false count + * Check whether column has true/false count. * @return true if has true/false count */ virtual bool hasCount() const = 0; @@ -92,13 +92,13 @@ namespace orc { virtual ~DateColumnStatistics(); /** - * check whether column has minimum + * Check whether column has minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column has maximum + * Check whether column has maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; @@ -124,19 +124,19 @@ namespace orc { virtual ~DecimalColumnStatistics(); /** - * check whether column has minimum + * Check whether column has minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column has maximum + * Check whether column has maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; /** - * check whether column has sum + * Check whether column has sum. * @return true if has sum */ virtual bool hasSum() const = 0; @@ -168,19 +168,19 @@ namespace orc { virtual ~DoubleColumnStatistics(); /** - * check whether column has minimum + * Check whether column has minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column has maximum + * Check whether column has maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; /** - * check whether column has sum + * Check whether column has sum. * @return true if has sum */ virtual bool hasSum() const = 0; @@ -215,19 +215,19 @@ namespace orc { virtual ~IntegerColumnStatistics(); /** - * check whether column has minimum + * Check whether column has minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column has maximum + * Check whether column has maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; /** - * check whether column has sum + * Check whether column has sum. * @return true if has sum */ virtual bool hasSum() const = 0; @@ -261,20 +261,20 @@ namespace orc { virtual ~StringColumnStatistics(); /** - * check whether column has minimum + * Check whether column has minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column has maximum + * Check whether column has maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; /** - * check whether column - * @return true if has maximum + * Check whether column has total length. + * @return true if has total length */ virtual bool hasTotalLength() const = 0; @@ -305,13 +305,13 @@ namespace orc { virtual ~TimestampColumnStatistics(); /** - * check whether column minimum + * Check whether column minimum. * @return true if has minimum */ virtual bool hasMinimum() const = 0; /** - * check whether column maximum + * Check whether column maximum. * @return true if has maximum */ virtual bool hasMaximum() const = 0; @@ -329,13 +329,13 @@ namespace orc { virtual int64_t getMaximum() const = 0; /** - * check whether column has a lowerBound + * Check whether column has a lowerBound. * @return true if column has a lowerBound */ virtual bool hasLowerBound() const = 0; /** - * check whether column has an upperBound + * Check whether column has an upperBound. * @return true if column has an upperBound */ virtual bool hasUpperBound() const = 0; @@ -360,14 +360,15 @@ namespace orc { virtual ~Statistics(); /** - * Get the statistics of colId column. + * Get the statistics of the given column. + * @param colId id of the column * @return one column's statistics */ virtual const ColumnStatistics* getColumnStatistics(uint32_t colId ) const = 0; /** - * Get the number of columns + * Get the number of columns. * @return the number of columns */ virtual uint32_t getNumberOfColumns() const = 0; @@ -378,16 +379,19 @@ namespace orc { virtual ~StripeStatistics(); /** - * Get the RowIndex statistics of a column id. - * @return one stripe RowIndex statistics + * Get the statistics of a given RowIndex entry in a given column. + * @param columnId id of the column + * @param rowIndexId RowIndex entry id + * @return statistics of the given RowIndex entry */ virtual const ColumnStatistics* getRowIndexStatistics( - uint32_t columnId, uint32_t IndexId) const = 0; + uint32_t columnId, uint32_t rowIndexId) const = 0; /** - * Get the number of RowIndexes - * @return the number of RowIndex Statistics + * Get the number of RowIndex statistics in a given column. + * @param columnId id of the column + * @return the number of RowIndex statistics */ virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0; }; diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh index 86a9c54a36..c9779987b1 100644 --- a/c++/include/orc/Vector.hh +++ b/c++/include/orc/Vector.hh @@ -134,7 +134,7 @@ namespace orc { /** * The offset of the first element of each list. - * The length of list i is startOffset[i+1] - startOffset[i]. + * The length of list i is offsets[i+1] - offsets[i]. */ DataBuffer offsets; @@ -151,8 +151,8 @@ namespace orc { bool hasVariableLength(); /** - * The offset of the first element of each list. - * The length of list i is startOffset[i+1] - startOffset[i]. + * The offset of the first element of each map. + * The size of map i is offsets[i+1] - offsets[i]. */ DataBuffer offsets; diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh index 1284c65d87..7efb12bae9 100644 --- a/c++/include/orc/Writer.hh +++ b/c++/include/orc/Writer.hh @@ -82,12 +82,12 @@ namespace orc { uint64_t getCompressionBlockSize() const; /** - * Set row index stride. Use value 0 to disable row index. + * Set row index stride (the number of rows per an entry in the row index). Use value 0 to disable row index. */ WriterOptions& setRowIndexStride(uint64_t stride); /** - * Get the index stride size. + * Get the row index stride (the number of rows per an entry in the row index). * @return if not set, return default value. */ uint64_t getRowIndexStride() const; @@ -159,7 +159,7 @@ namespace orc { WriterOptions& setMemoryPool(MemoryPool * memoryPool); /** - * Get the strip size. + * Get the memory pool. * @return if not set, return default memory pool. */ MemoryPool * getMemoryPool() const; @@ -193,8 +193,8 @@ namespace orc { /** * Create a row batch for writing the columns into this file. - * @param size the number of rows to read - * @return a new ColumnVectorBatch to write into + * @param size the number of rows to write. + * @return a new ColumnVectorBatch to write into. */ virtual ORC_UNIQUE_PTR createRowBatch(uint64_t size ) const = 0; @@ -206,7 +206,7 @@ namespace orc { virtual void add(ColumnVectorBatch& rowsToAdd) = 0; /** - * Close the write and flush any pending data to the output stream. + * Close the writer and flush any pending data to the output stream. */ virtual void close() = 0;