From ea32aec6a361bdd781d45bb429340c5897db515d Mon Sep 17 00:00:00 2001 From: dgibson Date: Wed, 14 Jan 2026 21:48:44 +0000 Subject: [PATCH 01/12] Update CMakeLists --- components/core/cmake/Options/options.cmake | 3 +++ components/core/src/clp_s/CMakeLists.txt | 6 ++++-- components/core/src/clp_s/indexer/CMakeLists.txt | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/components/core/cmake/Options/options.cmake b/components/core/cmake/Options/options.cmake index 1030fa0a61..a08403f692 100644 --- a/components/core/cmake/Options/options.cmake +++ b/components/core/cmake/Options/options.cmake @@ -132,6 +132,7 @@ function(validate_clp_binaries_dependencies) CLP_BUILD_CLP_S_SEARCH CLP_BUILD_CLP_S_SEARCH_AST CLP_BUILD_CLP_S_SEARCH_KQL + CLP_BUILD_CLP_S_TIMESTAMP_PARSER ) endfunction() @@ -207,6 +208,7 @@ function(validate_clp_s_archivereader_dependencies) CLP_BUILD_CLP_STRING_UTILS CLP_BUILD_CLP_S_CLP_DEPENDENCIES CLP_BUILD_CLP_S_IO + CLP_BUILD_CLP_S_TIMESTAMP_PARSER CLP_BUILD_CLP_S_TIMESTAMPPATTERN ) endfunction() @@ -228,6 +230,7 @@ function(validate_clp_s_archivewriter_dependencies) validate_clp_dependencies_for_target(CLP_BUILD_CLP_S_ARCHIVEWRITER CLP_BUILD_CLP_S_CLP_DEPENDENCIES CLP_BUILD_CLP_S_IO + CLP_BUILD_CLP_S_TIMESTAMP_PARSER CLP_BUILD_CLP_S_TIMESTAMPPATTERN ) endfunction() diff --git a/components/core/src/clp_s/CMakeLists.txt b/components/core/src/clp_s/CMakeLists.txt index d38c28ade5..72646ad317 100644 --- a/components/core/src/clp_s/CMakeLists.txt +++ b/components/core/src/clp_s/CMakeLists.txt @@ -279,13 +279,14 @@ if(CLP_BUILD_CLP_S_ARCHIVEWRITER) absl::flat_hash_map clp_s::clp_dependencies clp_s::io + clp_s::timestamp_parser + clp_s::timestamp_pattern msgpack-cxx nlohmann_json::nlohmann_json simdjson::simdjson ystdlib::error_handling PRIVATE Boost::url - clp_s::timestamp_pattern ${CURL_LIBRARIES} fmt::fmt spdlog::spdlog @@ -343,13 +344,14 @@ if(CLP_BUILD_CLP_S_ARCHIVEREADER) absl::flat_hash_map clp::string_utils clp_s::io + clp_s::timestamp_parser + clp_s::timestamp_pattern msgpack-cxx nlohmann_json::nlohmann_json ystdlib::error_handling PRIVATE Boost::url clp_s::clp_dependencies - clp_s::timestamp_pattern ${CURL_LIBRARIES} fmt::fmt spdlog::spdlog diff --git a/components/core/src/clp_s/indexer/CMakeLists.txt b/components/core/src/clp_s/indexer/CMakeLists.txt index ba21f50a26..4f2334d862 100644 --- a/components/core/src/clp_s/indexer/CMakeLists.txt +++ b/components/core/src/clp_s/indexer/CMakeLists.txt @@ -132,6 +132,7 @@ if(CLP_BUILD_EXECUTABLES) Boost::iostreams Boost::program_options Boost::url ${CURL_LIBRARIES} clp::string_utils + clp_s::timestamp_parser date::date MariaDBClient::MariaDBClient msgpack-cxx From fc541269636dac7017d3e6a0fefcbfb14ec9b425 Mon Sep 17 00:00:00 2001 From: dgibson Date: Wed, 14 Jan 2026 21:56:32 +0000 Subject: [PATCH 02/12] Add TimestampColumnReader class. --- components/core/src/clp_s/ColumnReader.cpp | 44 ++++++++++++++++++---- components/core/src/clp_s/ColumnReader.hpp | 39 ++++++++++++++++++- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp index 2d2a2d7ee6..68c96b3de2 100644 --- a/components/core/src/clp_s/ColumnReader.cpp +++ b/components/core/src/clp_s/ColumnReader.cpp @@ -30,7 +30,7 @@ void DeltaEncodedInt64ColumnReader::load(BufferViewReader& reader, uint64_t num_ } } -int64_t DeltaEncodedInt64ColumnReader::get_value_at_idx(size_t idx) { +auto DeltaEncodedInt64ColumnReader::get_value_at_idx(size_t idx) const -> int64_t { if (m_cur_idx == idx) { return m_cur_value; } @@ -226,31 +226,61 @@ int64_t VariableStringColumnReader::get_variable_id(uint64_t cur_message) { return m_variables[cur_message]; } -void DateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) { +void DeprecatedDateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) { m_timestamps = reader.read_unaligned_span(num_messages); m_timestamp_encodings = reader.read_unaligned_span(num_messages); } -std::variant DateStringColumnReader::extract_value( +std::variant DeprecatedDateStringColumnReader::extract_value( uint64_t cur_message ) { - return m_timestamp_dict->get_string_encoding( + return m_timestamp_dict->get_deprecated_timestamp_string_encoding( m_timestamps[cur_message], m_timestamp_encodings[cur_message] ); } -void DateStringColumnReader::extract_string_value_into_buffer( +void DeprecatedDateStringColumnReader::extract_string_value_into_buffer( uint64_t cur_message, std::string& buffer ) { - buffer.append(m_timestamp_dict->get_string_encoding( + buffer.append(m_timestamp_dict->get_deprecated_timestamp_string_encoding( m_timestamps[cur_message], m_timestamp_encodings[cur_message] )); } -epochtime_t DateStringColumnReader::get_encoded_time(uint64_t cur_message) { +epochtime_t DeprecatedDateStringColumnReader::get_encoded_time(uint64_t cur_message) { return m_timestamps[cur_message]; } + +void TimestampColumnReader::load(BufferViewReader& reader, uint64_t num_messages) { + m_timestamps.load(reader, num_messages); + m_timestamp_encodings = reader.read_unaligned_span(num_messages); +} + +std::variant TimestampColumnReader::extract_value( + uint64_t cur_message +) { + std::string ret; + m_timestamp_dict->append_timestamp_to_buffer( + m_timestamps.get_value_at_idx(cur_message), + m_timestamp_encodings[cur_message], + ret + ); + return ret; +} + +void +TimestampColumnReader::extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) { + m_timestamp_dict->append_timestamp_to_buffer( + m_timestamps.get_value_at_idx(cur_message), + m_timestamp_encodings[cur_message], + buffer + ); +} + +auto TimestampColumnReader::get_encoded_time(uint64_t cur_message) const -> epochtime_t { + return m_timestamps.get_value_at_idx(cur_message); +} } // namespace clp_s diff --git a/components/core/src/clp_s/ColumnReader.hpp b/components/core/src/clp_s/ColumnReader.hpp index 6170a3f161..228d021fb4 100644 --- a/components/core/src/clp_s/ColumnReader.hpp +++ b/components/core/src/clp_s/ColumnReader.hpp @@ -113,15 +113,15 @@ class DeltaEncodedInt64ColumnReader : public BaseColumnReader { void extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) override; -private: /** * Gets the value stored at a given index by summing up the stored deltas between the requested * index and the last requested index. * @param idx * @return The value stored at the requested index. */ - int64_t get_value_at_idx(size_t idx); + [[nodiscard]] auto get_value_at_idx(size_t idx) const -> int64_t; +private: UnalignedMemSpan m_values; int64_t m_cur_value{}; size_t m_cur_idx{}; @@ -360,6 +360,41 @@ class DateStringColumnReader : public BaseColumnReader { UnalignedMemSpan m_timestamps; UnalignedMemSpan m_timestamp_encodings; }; + +class TimestampColumnReader : public BaseColumnReader { +public: + // Constructor + TimestampColumnReader(int32_t id, std::shared_ptr timestamp_dict) + : BaseColumnReader(id), + m_timestamp_dict(std::move(timestamp_dict)), + m_timestamps{id} {} + + // Destructor + ~TimestampColumnReader() override = default; + + // Methods inherited from BaseColumnReader + void load(BufferViewReader& reader, uint64_t num_messages) override; + + NodeType get_type() override { return NodeType::Timestamp; } + + std::variant extract_value( + uint64_t cur_message + ) override; + + void extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) override; + + /** + * @param cur_message + * @return The encoded time in epoch nanoseconds. + */ + [[nodiscard]] auto get_encoded_time(uint64_t cur_message) const -> epochtime_t; + +private: + std::shared_ptr m_timestamp_dict; + + DeltaEncodedInt64ColumnReader m_timestamps; + UnalignedMemSpan m_timestamp_encodings; +}; } // namespace clp_s #endif // CLP_S_COLUMNREADER_HPP From 2a89278fb0c66c51625f90ea9da6f79a456d2cda Mon Sep 17 00:00:00 2001 From: dgibson Date: Wed, 14 Jan 2026 22:16:33 +0000 Subject: [PATCH 03/12] Add TimestampColumnWriter class. --- components/core/src/clp_s/ColumnWriter.cpp | 28 ++++++++++++++++----- components/core/src/clp_s/ColumnWriter.hpp | 21 ++++++++++++++++ components/core/src/clp_s/ParsedMessage.hpp | 1 + 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp_s/ColumnWriter.cpp b/components/core/src/clp_s/ColumnWriter.cpp index 1f57f5c454..85e86f86f8 100644 --- a/components/core/src/clp_s/ColumnWriter.cpp +++ b/components/core/src/clp_s/ColumnWriter.cpp @@ -28,18 +28,21 @@ void Int64ColumnWriter::store(ZstdCompressor& compressor) { compressor.write(reinterpret_cast(m_values.data()), size); } -size_t DeltaEncodedInt64ColumnWriter::add_value(ParsedMessage::variable_t& value) { +auto DeltaEncodedInt64ColumnWriter::add_value(int64_t value) -> size_t { if (0 == m_values.size()) { - m_cur = std::get(value); - m_values.push_back(m_cur); + m_cur = value; + m_values.emplace_back(m_cur); } else { - auto next = std::get(value); - m_values.push_back(next - m_cur); - m_cur = next; + m_values.emplace_back(value - m_cur); + m_cur = value; } return sizeof(int64_t); } +size_t DeltaEncodedInt64ColumnWriter::add_value(ParsedMessage::variable_t& value) { + return add_value(std::get(value)); +} + void DeltaEncodedInt64ColumnWriter::store(ZstdCompressor& compressor) { size_t size = m_values.size() * sizeof(int64_t); compressor.write(reinterpret_cast(m_values.data()), size); @@ -181,4 +184,17 @@ void DateStringColumnWriter::store(ZstdCompressor& compressor) { size_t encodings_size = m_timestamp_encodings.size() * sizeof(int64_t); compressor.write(reinterpret_cast(m_timestamp_encodings.data()), encodings_size); } + +auto TimestampColumnWriter::add_value(ParsedMessage::variable_t& value) -> size_t { + auto const& encoded_timestamp = std::get>(value); + auto const encoded_timestamp_size{m_timestamps.add_value(encoded_timestamp.first)}; + m_timestamp_encodings.emplace_back(encoded_timestamp.second); + return encoded_timestamp_size + sizeof(uint64_t); +} + +void TimestampColumnWriter::store(ZstdCompressor& compressor) { + m_timestamps.store(compressor); + size_t const encodings_size{m_timestamp_encodings.size() * sizeof(uint64_t)}; + compressor.write(reinterpret_cast(m_timestamp_encodings.data()), encodings_size); +} } // namespace clp_s diff --git a/components/core/src/clp_s/ColumnWriter.hpp b/components/core/src/clp_s/ColumnWriter.hpp index 78b7665493..58beda3517 100644 --- a/components/core/src/clp_s/ColumnWriter.hpp +++ b/components/core/src/clp_s/ColumnWriter.hpp @@ -72,6 +72,9 @@ class DeltaEncodedInt64ColumnWriter : public BaseColumnWriter { // Destructor ~DeltaEncodedInt64ColumnWriter() override = default; + // Methods + [[nodiscard]] auto add_value(int64_t value) -> size_t; + // Methods inherited from BaseColumnWriter size_t add_value(ParsedMessage::variable_t& value) override; @@ -256,6 +259,24 @@ class DateStringColumnWriter : public BaseColumnWriter { std::vector m_timestamps; std::vector m_timestamp_encodings; }; + +class TimestampColumnWriter : public BaseColumnWriter { +public: + // Constructor + explicit TimestampColumnWriter(int32_t id) : BaseColumnWriter(id), m_timestamps{id} {} + + // Destructor + ~TimestampColumnWriter() override = default; + + // Methods inherited from BaseColumnWriter + auto add_value(ParsedMessage::variable_t& value) -> size_t override; + + void store(ZstdCompressor& compressor) override; + +private: + DeltaEncodedInt64ColumnWriter m_timestamps; + std::vector m_timestamp_encodings; +}; } // namespace clp_s #endif // CLP_S_COLUMNWRITER_HPP diff --git a/components/core/src/clp_s/ParsedMessage.hpp b/components/core/src/clp_s/ParsedMessage.hpp index 45ac3e0304..00feb02719 100644 --- a/components/core/src/clp_s/ParsedMessage.hpp +++ b/components/core/src/clp_s/ParsedMessage.hpp @@ -24,6 +24,7 @@ class ParsedMessage { clp::ffi::FourByteEncodedTextAst, bool, std::pair, + std::pair, std::pair>; // Constructor From 661b3810378fc4a5565ff137afc3bd5041b30b55 Mon Sep 17 00:00:00 2001 From: dgibson Date: Wed, 14 Jan 2026 22:18:17 +0000 Subject: [PATCH 04/12] Get rid of mistakenly added const --- components/core/src/clp_s/ColumnReader.cpp | 4 ++-- components/core/src/clp_s/ColumnReader.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp index 68c96b3de2..9fdb48d3de 100644 --- a/components/core/src/clp_s/ColumnReader.cpp +++ b/components/core/src/clp_s/ColumnReader.cpp @@ -30,7 +30,7 @@ void DeltaEncodedInt64ColumnReader::load(BufferViewReader& reader, uint64_t num_ } } -auto DeltaEncodedInt64ColumnReader::get_value_at_idx(size_t idx) const -> int64_t { +auto DeltaEncodedInt64ColumnReader::get_value_at_idx(size_t idx) -> int64_t { if (m_cur_idx == idx) { return m_cur_value; } @@ -280,7 +280,7 @@ TimestampColumnReader::extract_string_value_into_buffer(uint64_t cur_message, st ); } -auto TimestampColumnReader::get_encoded_time(uint64_t cur_message) const -> epochtime_t { +auto TimestampColumnReader::get_encoded_time(uint64_t cur_message) -> epochtime_t { return m_timestamps.get_value_at_idx(cur_message); } } // namespace clp_s diff --git a/components/core/src/clp_s/ColumnReader.hpp b/components/core/src/clp_s/ColumnReader.hpp index 228d021fb4..591b9bd8a6 100644 --- a/components/core/src/clp_s/ColumnReader.hpp +++ b/components/core/src/clp_s/ColumnReader.hpp @@ -119,7 +119,7 @@ class DeltaEncodedInt64ColumnReader : public BaseColumnReader { * @param idx * @return The value stored at the requested index. */ - [[nodiscard]] auto get_value_at_idx(size_t idx) const -> int64_t; + [[nodiscard]] auto get_value_at_idx(size_t idx) -> int64_t; private: UnalignedMemSpan m_values; @@ -387,7 +387,7 @@ class TimestampColumnReader : public BaseColumnReader { * @param cur_message * @return The encoded time in epoch nanoseconds. */ - [[nodiscard]] auto get_encoded_time(uint64_t cur_message) const -> epochtime_t; + [[nodiscard]] auto get_encoded_time(uint64_t cur_message) -> epochtime_t; private: std::shared_ptr m_timestamp_dict; From e02701a1fe27e98f0e28996189424b4e95f428fb Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:04:44 +0000 Subject: [PATCH 05/12] Undo some renamings that should be part of follow-up PR. --- components/core/src/clp_s/ColumnReader.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp index 9fdb48d3de..c041ab7354 100644 --- a/components/core/src/clp_s/ColumnReader.cpp +++ b/components/core/src/clp_s/ColumnReader.cpp @@ -226,31 +226,31 @@ int64_t VariableStringColumnReader::get_variable_id(uint64_t cur_message) { return m_variables[cur_message]; } -void DeprecatedDateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) { +void DateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) { m_timestamps = reader.read_unaligned_span(num_messages); m_timestamp_encodings = reader.read_unaligned_span(num_messages); } -std::variant DeprecatedDateStringColumnReader::extract_value( +std::variant DateStringColumnReader::extract_value( uint64_t cur_message ) { - return m_timestamp_dict->get_deprecated_timestamp_string_encoding( + return m_timestamp_dict->get_string_encoding( m_timestamps[cur_message], m_timestamp_encodings[cur_message] ); } -void DeprecatedDateStringColumnReader::extract_string_value_into_buffer( +void DateStringColumnReader::extract_string_value_into_buffer( uint64_t cur_message, std::string& buffer ) { - buffer.append(m_timestamp_dict->get_deprecated_timestamp_string_encoding( + buffer.append(m_timestamp_dict->get_string_encoding( m_timestamps[cur_message], m_timestamp_encodings[cur_message] )); } -epochtime_t DeprecatedDateStringColumnReader::get_encoded_time(uint64_t cur_message) { +epochtime_t DateStringColumnReader::get_encoded_time(uint64_t cur_message) { return m_timestamps[cur_message]; } From 7194e1ede4d9cce997c3984f58f202bb331e2a48 Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:07:39 +0000 Subject: [PATCH 06/12] Reserve Timestamp NodeType. --- components/core/src/clp_s/SchemaTree.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/components/core/src/clp_s/SchemaTree.hpp b/components/core/src/clp_s/SchemaTree.hpp index deafcdafef..0617ea28a2 100644 --- a/components/core/src/clp_s/SchemaTree.hpp +++ b/components/core/src/clp_s/SchemaTree.hpp @@ -44,6 +44,7 @@ enum class NodeType : uint8_t { DeltaInteger, FormattedFloat, DictionaryFloat, + Timestamp, Unknown = std::underlying_type::type(~0ULL) }; From 56a5ff6ded403ef9ee27099c42964d27c9b3ba62 Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:11:35 +0000 Subject: [PATCH 07/12] Add stub for append_timestamp_to_buffer that will be filled in in follow-up PR. --- .../core/src/clp_s/TimestampDictionaryReader.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/components/core/src/clp_s/TimestampDictionaryReader.hpp b/components/core/src/clp_s/TimestampDictionaryReader.hpp index 466b16ed13..a6bc303a42 100644 --- a/components/core/src/clp_s/TimestampDictionaryReader.hpp +++ b/components/core/src/clp_s/TimestampDictionaryReader.hpp @@ -36,6 +36,21 @@ class TimestampDictionaryReader { */ std::string get_string_encoding(epochtime_t epoch, uint64_t format_id) const; + /** + * Marshals and appends the `timestamp` to the `buffer` by interpreting the timestamp pattern + * referenced by `format_id` as a `clp_s::timestamp_parser::TimestampPattern`. + * @param timestamp + * @param format_id + * @param buffer + * @throws OperationFailed is the format indicated by `format_id` can not be interpreted as a + * `clp_s::timestamp_parser::TimestampPattern`. + */ + void append_timestamp_to_buffer( + epochtime_t timestamp, + uint64_t format_id, + std::string& buffer + ) const { /*NO-OP until follow-up PR*/} + /** * Gets iterators for the timestamp patterns * @return begin and end iterators for the timestamp patterns From 8810fd67cd1a928401750ab3cf16e0bc2c30cdce Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:18:38 +0000 Subject: [PATCH 08/12] Lint fix. --- components/core/src/clp_s/TimestampDictionaryReader.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp_s/TimestampDictionaryReader.hpp b/components/core/src/clp_s/TimestampDictionaryReader.hpp index a6bc303a42..ad8260a7e4 100644 --- a/components/core/src/clp_s/TimestampDictionaryReader.hpp +++ b/components/core/src/clp_s/TimestampDictionaryReader.hpp @@ -49,7 +49,7 @@ class TimestampDictionaryReader { epochtime_t timestamp, uint64_t format_id, std::string& buffer - ) const { /*NO-OP until follow-up PR*/} + ) const { /*NO-OP until follow-up PR*/ } /** * Gets iterators for the timestamp patterns From 7cbd80d266f493782c535d8887de784d177c3b7c Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:33:03 +0000 Subject: [PATCH 09/12] Revert "Update CMakeLists" This reverts commit ea32aec6a361bdd781d45bb429340c5897db515d. --- components/core/cmake/Options/options.cmake | 3 --- components/core/src/clp_s/CMakeLists.txt | 6 ++---- components/core/src/clp_s/indexer/CMakeLists.txt | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/components/core/cmake/Options/options.cmake b/components/core/cmake/Options/options.cmake index a08403f692..1030fa0a61 100644 --- a/components/core/cmake/Options/options.cmake +++ b/components/core/cmake/Options/options.cmake @@ -132,7 +132,6 @@ function(validate_clp_binaries_dependencies) CLP_BUILD_CLP_S_SEARCH CLP_BUILD_CLP_S_SEARCH_AST CLP_BUILD_CLP_S_SEARCH_KQL - CLP_BUILD_CLP_S_TIMESTAMP_PARSER ) endfunction() @@ -208,7 +207,6 @@ function(validate_clp_s_archivereader_dependencies) CLP_BUILD_CLP_STRING_UTILS CLP_BUILD_CLP_S_CLP_DEPENDENCIES CLP_BUILD_CLP_S_IO - CLP_BUILD_CLP_S_TIMESTAMP_PARSER CLP_BUILD_CLP_S_TIMESTAMPPATTERN ) endfunction() @@ -230,7 +228,6 @@ function(validate_clp_s_archivewriter_dependencies) validate_clp_dependencies_for_target(CLP_BUILD_CLP_S_ARCHIVEWRITER CLP_BUILD_CLP_S_CLP_DEPENDENCIES CLP_BUILD_CLP_S_IO - CLP_BUILD_CLP_S_TIMESTAMP_PARSER CLP_BUILD_CLP_S_TIMESTAMPPATTERN ) endfunction() diff --git a/components/core/src/clp_s/CMakeLists.txt b/components/core/src/clp_s/CMakeLists.txt index 72646ad317..d38c28ade5 100644 --- a/components/core/src/clp_s/CMakeLists.txt +++ b/components/core/src/clp_s/CMakeLists.txt @@ -279,14 +279,13 @@ if(CLP_BUILD_CLP_S_ARCHIVEWRITER) absl::flat_hash_map clp_s::clp_dependencies clp_s::io - clp_s::timestamp_parser - clp_s::timestamp_pattern msgpack-cxx nlohmann_json::nlohmann_json simdjson::simdjson ystdlib::error_handling PRIVATE Boost::url + clp_s::timestamp_pattern ${CURL_LIBRARIES} fmt::fmt spdlog::spdlog @@ -344,14 +343,13 @@ if(CLP_BUILD_CLP_S_ARCHIVEREADER) absl::flat_hash_map clp::string_utils clp_s::io - clp_s::timestamp_parser - clp_s::timestamp_pattern msgpack-cxx nlohmann_json::nlohmann_json ystdlib::error_handling PRIVATE Boost::url clp_s::clp_dependencies + clp_s::timestamp_pattern ${CURL_LIBRARIES} fmt::fmt spdlog::spdlog diff --git a/components/core/src/clp_s/indexer/CMakeLists.txt b/components/core/src/clp_s/indexer/CMakeLists.txt index 4f2334d862..ba21f50a26 100644 --- a/components/core/src/clp_s/indexer/CMakeLists.txt +++ b/components/core/src/clp_s/indexer/CMakeLists.txt @@ -132,7 +132,6 @@ if(CLP_BUILD_EXECUTABLES) Boost::iostreams Boost::program_options Boost::url ${CURL_LIBRARIES} clp::string_utils - clp_s::timestamp_parser date::date MariaDBClient::MariaDBClient msgpack-cxx From dcc25bf99c3fac781580819cb6192bada691509e Mon Sep 17 00:00:00 2001 From: dgibson Date: Thu, 15 Jan 2026 16:50:31 +0000 Subject: [PATCH 10/12] Typo --- components/core/src/clp_s/TimestampDictionaryReader.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp_s/TimestampDictionaryReader.hpp b/components/core/src/clp_s/TimestampDictionaryReader.hpp index ad8260a7e4..2272b43954 100644 --- a/components/core/src/clp_s/TimestampDictionaryReader.hpp +++ b/components/core/src/clp_s/TimestampDictionaryReader.hpp @@ -42,7 +42,7 @@ class TimestampDictionaryReader { * @param timestamp * @param format_id * @param buffer - * @throws OperationFailed is the format indicated by `format_id` can not be interpreted as a + * @throws OperationFailed if the format indicated by `format_id` can not be interpreted as a * `clp_s::timestamp_parser::TimestampPattern`. */ void append_timestamp_to_buffer( From 2989b065171c8fc417264114964e9a3b05e1460c Mon Sep 17 00:00:00 2001 From: Devin Gibson Date: Mon, 19 Jan 2026 12:04:07 -0500 Subject: [PATCH 11/12] Apply suggestions from code review Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- components/core/src/clp_s/ColumnReader.cpp | 5 ++--- components/core/src/clp_s/ColumnReader.hpp | 11 +++++------ components/core/src/clp_s/ColumnWriter.cpp | 6 +++--- components/core/src/clp_s/ColumnWriter.hpp | 2 +- .../core/src/clp_s/TimestampDictionaryReader.hpp | 2 +- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp index c041ab7354..41abed057e 100644 --- a/components/core/src/clp_s/ColumnReader.cpp +++ b/components/core/src/clp_s/ColumnReader.cpp @@ -259,9 +259,8 @@ void TimestampColumnReader::load(BufferViewReader& reader, uint64_t num_messages m_timestamp_encodings = reader.read_unaligned_span(num_messages); } -std::variant TimestampColumnReader::extract_value( - uint64_t cur_message -) { +auto TimestampColumnReader::extract_value(uint64_t cur_message) + -> std::variant { std::string ret; m_timestamp_dict->append_timestamp_to_buffer( m_timestamps.get_value_at_idx(cur_message), diff --git a/components/core/src/clp_s/ColumnReader.hpp b/components/core/src/clp_s/ColumnReader.hpp index 591b9bd8a6..d1138d5c96 100644 --- a/components/core/src/clp_s/ColumnReader.hpp +++ b/components/core/src/clp_s/ColumnReader.hpp @@ -365,8 +365,8 @@ class TimestampColumnReader : public BaseColumnReader { public: // Constructor TimestampColumnReader(int32_t id, std::shared_ptr timestamp_dict) - : BaseColumnReader(id), - m_timestamp_dict(std::move(timestamp_dict)), + : BaseColumnReader{id}, + m_timestamp_dict{std::move(timestamp_dict)}, m_timestamps{id} {} // Destructor @@ -375,11 +375,10 @@ class TimestampColumnReader : public BaseColumnReader { // Methods inherited from BaseColumnReader void load(BufferViewReader& reader, uint64_t num_messages) override; - NodeType get_type() override { return NodeType::Timestamp; } + auto get_type() -> NodeType override { return NodeType::Timestamp; } - std::variant extract_value( - uint64_t cur_message - ) override; + auto extract_value(uint64_t cur_message) + -> std::variant override; void extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) override; diff --git a/components/core/src/clp_s/ColumnWriter.cpp b/components/core/src/clp_s/ColumnWriter.cpp index 85e86f86f8..5b888f436d 100644 --- a/components/core/src/clp_s/ColumnWriter.cpp +++ b/components/core/src/clp_s/ColumnWriter.cpp @@ -186,9 +186,9 @@ void DateStringColumnWriter::store(ZstdCompressor& compressor) { } auto TimestampColumnWriter::add_value(ParsedMessage::variable_t& value) -> size_t { - auto const& encoded_timestamp = std::get>(value); - auto const encoded_timestamp_size{m_timestamps.add_value(encoded_timestamp.first)}; - m_timestamp_encodings.emplace_back(encoded_timestamp.second); + auto const [timestamp, encoding] = std::get>(value); + auto const encoded_timestamp_size{m_timestamps.add_value(timestamp)}; + m_timestamp_encodings.emplace_back(encoding); return encoded_timestamp_size + sizeof(uint64_t); } diff --git a/components/core/src/clp_s/ColumnWriter.hpp b/components/core/src/clp_s/ColumnWriter.hpp index 58beda3517..8b32f23669 100644 --- a/components/core/src/clp_s/ColumnWriter.hpp +++ b/components/core/src/clp_s/ColumnWriter.hpp @@ -263,7 +263,7 @@ class DateStringColumnWriter : public BaseColumnWriter { class TimestampColumnWriter : public BaseColumnWriter { public: // Constructor - explicit TimestampColumnWriter(int32_t id) : BaseColumnWriter(id), m_timestamps{id} {} + explicit TimestampColumnWriter(int32_t id) : BaseColumnWriter{id}, m_timestamps{id} {} // Destructor ~TimestampColumnWriter() override = default; diff --git a/components/core/src/clp_s/TimestampDictionaryReader.hpp b/components/core/src/clp_s/TimestampDictionaryReader.hpp index 2272b43954..0a0a2665f5 100644 --- a/components/core/src/clp_s/TimestampDictionaryReader.hpp +++ b/components/core/src/clp_s/TimestampDictionaryReader.hpp @@ -42,7 +42,7 @@ class TimestampDictionaryReader { * @param timestamp * @param format_id * @param buffer - * @throws OperationFailed if the format indicated by `format_id` can not be interpreted as a + * @throws OperationFailed if the format indicated by `format_id` cannot be interpreted as a * `clp_s::timestamp_parser::TimestampPattern`. */ void append_timestamp_to_buffer( From 0ab13ab36a79a9a584c66e08acf06336cc06c244 Mon Sep 17 00:00:00 2001 From: dgibson Date: Mon, 19 Jan 2026 17:11:14 +0000 Subject: [PATCH 12/12] Simplify DeltaEncodedInt64ColumnWriter::add_value per review comment. --- components/core/src/clp_s/ColumnWriter.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/components/core/src/clp_s/ColumnWriter.cpp b/components/core/src/clp_s/ColumnWriter.cpp index 5b888f436d..b0ec27fe05 100644 --- a/components/core/src/clp_s/ColumnWriter.cpp +++ b/components/core/src/clp_s/ColumnWriter.cpp @@ -29,13 +29,8 @@ void Int64ColumnWriter::store(ZstdCompressor& compressor) { } auto DeltaEncodedInt64ColumnWriter::add_value(int64_t value) -> size_t { - if (0 == m_values.size()) { - m_cur = value; - m_values.emplace_back(m_cur); - } else { - m_values.emplace_back(value - m_cur); - m_cur = value; - } + m_values.emplace_back(value - m_cur); + m_cur = value; return sizeof(int64_t); }