Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
879e8b9
Remove special cases for indexing the timestamp column.
gibber9809 Dec 16, 2025
a0bffeb
Expose heuristic for estimating precision of unknown-precision timest…
gibber9809 Dec 16, 2025
28f7261
Remove TimestampPattern::init method and related global.
gibber9809 Dec 16, 2025
7a04767
Simplify timestamp dictionary readers and writers.
gibber9809 Dec 16, 2025
16512a2
Add Timestamp MPT node.
gibber9809 Dec 16, 2025
7435780
Add reader and writer for Timestamp column.
gibber9809 Dec 16, 2025
0888f4b
Add write-side changes for parsing timestamps into Timestamp column.
gibber9809 Dec 16, 2025
8ce2373
Implement read-side changes for new Timestamp column.
gibber9809 Dec 16, 2025
f08307f
Update cmake build.
gibber9809 Dec 16, 2025
7f03234
Update tests to handle TimestampColumn
gibber9809 Dec 16, 2025
847c6e1
Rename DateString to DeprecatedDateString in comment in webui code.
gibber9809 Dec 16, 2025
d8089e8
Merge branch 'main' into clp-s-add-timestamp-column
jackluo923 Dec 21, 2025
b4a1e5b
Merge branch 'main' into clp-s-add-timestamp-column
jackluo923 Jan 8, 2026
8327233
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Jan 14, 2026
6049808
Simplify string timestamp ingestion using new quoted pattern special …
gibber9809 Jan 14, 2026
c8505e4
Get rid of assume_value usage
gibber9809 Jan 20, 2026
084a8c2
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Jan 20, 2026
6130b00
Add basic test for timestamp round-tripping.
gibber9809 Jan 20, 2026
24240f4
Lint fix.
gibber9809 Jan 20, 2026
d2fb8e2
Fix scaling per rabbit comment.
gibber9809 Jan 20, 2026
18c1751
Fix clear() per rabbit comment.
gibber9809 Jan 20, 2026
8a4d93d
Merge branch 'main' into clp-s-add-timestamp-column
junhaoliao Jan 22, 2026
9c2c3ac
Apply suggestions from code review
gibber9809 Jan 23, 2026
6ae885c
Fix compilation error; minor renaming.
gibber9809 Jan 23, 2026
0a3e1bb
Renaming suggested in review comment.
gibber9809 Jan 23, 2026
6671150
Make generated_pattern a member of TimestampDictionaryWriter
gibber9809 Jan 23, 2026
aa2766d
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Jan 23, 2026
aaf738f
Use try_emplace in one more place.
gibber9809 Jan 23, 2026
858f1c6
Address rabbit comment.
gibber9809 Jan 23, 2026
558ed39
Apply suggestions from code review
gibber9809 Jan 26, 2026
8023508
Update comments per code review.
gibber9809 Jan 26, 2026
190e843
Remove throw, per review comment.
gibber9809 Jan 26, 2026
9be9f90
Update outdated docstrings for get_{begin,end}_timestamp
gibber9809 Jan 26, 2026
987c994
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Jan 26, 2026
8e7b403
Apply suggestions from code review
gibber9809 Jan 28, 2026
a24d9d8
Address some review comments in TimestampDictionaryReader.cpp
gibber9809 Jan 29, 2026
d42ceac
Address review comment.
gibber9809 Jan 29, 2026
dd58b56
Use newly defined constant for first version with new timestamp forma…
gibber9809 Jan 29, 2026
b86a3e0
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Jan 29, 2026
1681a6a
Go back to communicating timestamps in millisecond precision when ass…
gibber9809 Jan 29, 2026
a29d01a
Apply suggestions from code review
gibber9809 Feb 7, 2026
2fd0349
Merge branch 'main' into clp-s-add-timestamp-column
gibber9809 Feb 7, 2026
b2fd04f
Clean up checks for deprecated timestamp version, per review comments.
gibber9809 Feb 7, 2026
5e5a4c8
Update sql for getting timestamp key for presto webui.
gibber9809 Feb 7, 2026
3bc858d
Fix bug causing column ID for the authoritative timestamp column to n…
gibber9809 Feb 9, 2026
816010d
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Feb 9, 2026
aea7667
Apply suggestions from code review
gibber9809 Feb 10, 2026
8fc35ae
Merge branch 'main' into clp-s-add-timestamp-column
gibber9809 Feb 10, 2026
d7cbeea
Fix format
gibber9809 Feb 10, 2026
e3d8868
Address QueryRunner review comment.
gibber9809 Feb 11, 2026
25a1703
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Feb 11, 2026
65cbef2
Merge remote-tracking branch 'upstream/main' into clp-s-add-timestamp…
gibber9809 Feb 12, 2026
cdec0d2
Merge branch 'main' into clp-s-add-timestamp-column
junhaoliao Feb 17, 2026
52779b5
Merge branch 'main' into clp-s-add-timestamp-column
gibber9809 Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions components/core/cmake/Options/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ function(validate_clp_binaries_dependencies)
CLP_BUILD_CLP_S_SEARCH
CLP_BUILD_CLP_S_SEARCH_AST
CLP_BUILD_CLP_S_SEARCH_KQL
CLP_BUILD_CLP_S_TIMESTAMP_PARSER
)
endfunction()

Expand Down Expand Up @@ -207,6 +208,7 @@ function(validate_clp_s_archivereader_dependencies)
CLP_BUILD_CLP_STRING_UTILS
CLP_BUILD_CLP_S_CLP_DEPENDENCIES
CLP_BUILD_CLP_S_IO
CLP_BUILD_CLP_S_TIMESTAMP_PARSER
CLP_BUILD_CLP_S_TIMESTAMPPATTERN
)
endfunction()
Expand All @@ -228,6 +230,7 @@ function(validate_clp_s_archivewriter_dependencies)
validate_clp_dependencies_for_target(CLP_BUILD_CLP_S_ARCHIVEWRITER
CLP_BUILD_CLP_S_CLP_DEPENDENCIES
CLP_BUILD_CLP_S_IO
CLP_BUILD_CLP_S_TIMESTAMP_PARSER
CLP_BUILD_CLP_S_TIMESTAMPPATTERN
)
endfunction()
Expand Down
15 changes: 10 additions & 5 deletions components/core/src/clp_s/ArchiveReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,12 @@ BaseColumnReader* ArchiveReader::append_reader_column(SchemaReader& reader, int3
case NodeType::UnstructuredArray:
column_reader = new ClpStringColumnReader(column_id, m_var_dict, m_array_dict, true);
break;
case NodeType::DateString:
column_reader = new DateStringColumnReader(column_id, get_timestamp_dictionary());
case NodeType::DeprecatedDateString:
column_reader
= new DeprecatedDateStringColumnReader(column_id, get_timestamp_dictionary());
break;
case NodeType::Timestamp:
column_reader = new TimestampColumnReader(column_id, get_timestamp_dictionary());
break;
// No need to push columns without associated object readers into the SchemaReader.
case NodeType::Metadata:
Expand Down Expand Up @@ -268,10 +272,11 @@ void ArchiveReader::append_unordered_reader_columns(
case NodeType::Boolean:
column_reader = new BooleanColumnReader(column_id);
break;
// UnstructuredArray and DateString currently aren't supported as part of any unordered
// object, so we disregard them here
// UnstructuredArray, DeprecatedDateString, and Timestamp currently aren't supported as
// part of any unordered object, so we disregard them here
case NodeType::UnstructuredArray:
case NodeType::DateString:
case NodeType::DeprecatedDateString:
case NodeType::Timestamp:
// No need to push columns without associated object readers into the SchemaReader.
case NodeType::StructuredArray:
case NodeType::Object:
Expand Down
8 changes: 8 additions & 0 deletions components/core/src/clp_s/ArchiveReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ class ArchiveReader {
*/
bool has_log_order() { return m_log_event_idx_column_id >= 0; }

/**
* @return Whether this archive can contain columns with the deprecated DateString timestamp
* format.
*/
[[nodiscard]] auto has_deprecated_timestamp_format() const -> bool {
return get_header().has_deprecated_timestamp_format();
}

private:
/**
* Initializes a schema reader passed by reference to become a reader for a given schema.
Expand Down
5 changes: 4 additions & 1 deletion components/core/src/clp_s/ArchiveReaderAdaptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ ArchiveReaderAdaptor::try_read_archive_file_info(ZstdDecompressor& decompressor,

ErrorCode
ArchiveReaderAdaptor::try_read_timestamp_dictionary(ZstdDecompressor& decompressor, size_t size) {
return m_timestamp_dictionary->read(decompressor);
return m_timestamp_dictionary->read(
decompressor,
m_archive_header.has_deprecated_timestamp_format()
);
}

ErrorCode ArchiveReaderAdaptor::try_read_archive_info(ZstdDecompressor& decompressor, size_t size) {
Expand Down
7 changes: 4 additions & 3 deletions components/core/src/clp_s/ArchiveWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,13 @@ void ArchiveWriter::initialize_schema_writer(SchemaWriter* writer, Schema const&
std::make_unique<ClpStringColumnWriter>(id, m_var_dict, m_array_dict)
);
break;
case NodeType::DateString:
writer->append_column(std::make_unique<DateStringColumnWriter>(id));
break;
case NodeType::DeltaInteger:
writer->append_column(std::make_unique<DeltaEncodedInt64ColumnWriter>(id));
break;
case NodeType::Timestamp:
writer->append_column(std::make_unique<TimestampColumnWriter>(id));
break;
case NodeType::DeprecatedDateString:
case NodeType::Metadata:
case NodeType::NullValue:
case NodeType::Object:
Expand Down
41 changes: 28 additions & 13 deletions components/core/src/clp_s/ArchiveWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,34 +193,49 @@ class ArchiveWriter {
int32_t add_schema(Schema const& schema) { return m_schema_map.add_schema(schema); }

/**
* Ingests a timestamp entry from a string
* Ingests a timestamp entry from a string.
* @param key
* @param node_id
* @param timestamp
* @param pattern_id
* @return the epoch time corresponding to the string timestamp
* @param is_json_literal
* @return Forwards `TimestampDictionaryWriter::ingest_string_timestamp`'s return values.
*/
epochtime_t ingest_timestamp_entry(
[[nodiscard]] auto ingest_string_timestamp(
std::string_view key,
int32_t node_id,
std::string_view timestamp,
uint64_t& pattern_id
) {
return m_timestamp_dict.ingest_entry(key, node_id, timestamp, pattern_id);
bool is_json_literal
) -> std::pair<epochtime_t, uint64_t> {
return m_timestamp_dict.ingest_string_timestamp(key, node_id, timestamp, is_json_literal);
}

/**
* Ingests a timestamp entry from a number
* @param column_key
* Ingests a numeric JSON entry.
* @param key
* @param node_id
* @param timestamp
* @return Forwards `TimestampDictionaryWriter::ingest_numeric_json_timestamp`'s return values.
*/
void ingest_timestamp_entry(std::string_view key, int32_t node_id, double timestamp) {
m_timestamp_dict.ingest_entry(key, node_id, timestamp);
[[nodiscard]] auto
ingest_numeric_json_timestamp(std::string_view key, int32_t node_id, std::string_view timestamp)
-> std::pair<epochtime_t, uint64_t> {
return m_timestamp_dict.ingest_numeric_json_timestamp(key, node_id, timestamp);
}

void ingest_timestamp_entry(std::string_view key, int32_t node_id, int64_t timestamp) {
m_timestamp_dict.ingest_entry(key, node_id, timestamp);
/**
* Ingests an unknown precision epoch timestamp.
* @param key
* @param node_id
* @param timestamp
* @return Forwards `TimestampDictionaryWriter::ingest_unknown_precision_epoch_timestamp`'s
* return values.
*/
[[nodiscard]] auto ingest_unknown_precision_epoch_timestamp(
std::string_view key,
int32_t node_id,
int64_t timestamp
) -> std::pair<epochtime_t, uint64_t> {
return m_timestamp_dict.ingest_unknown_precision_epoch_timestamp(key, node_id, timestamp);
}

/**
Expand Down
6 changes: 4 additions & 2 deletions components/core/src/clp_s/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,14 @@ if(CLP_BUILD_CLP_S_ARCHIVEWRITER)
absl::flat_hash_map
clp_s::clp_dependencies
clp_s::io
clp_s::timestamp_parser
clp_s::timestamp_pattern
msgpack-cxx
nlohmann_json::nlohmann_json
simdjson::simdjson
ystdlib::error_handling
PRIVATE
Boost::url
clp_s::timestamp_pattern
${CURL_LIBRARIES}
fmt::fmt
spdlog::spdlog
Expand Down Expand Up @@ -345,13 +346,14 @@ if(CLP_BUILD_CLP_S_ARCHIVEREADER)
absl::flat_hash_map
clp::string_utils
clp_s::io
clp_s::timestamp_parser
clp_s::timestamp_pattern
msgpack-cxx
nlohmann_json::nlohmann_json
ystdlib::error_handling
PRIVATE
Boost::url
clp_s::clp_dependencies
clp_s::timestamp_pattern
${CURL_LIBRARIES}
fmt::fmt
spdlog::spdlog
Expand Down
12 changes: 6 additions & 6 deletions components/core/src/clp_s/ColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,31 +226,31 @@ int64_t VariableStringColumnReader::get_variable_id(uint64_t cur_message) {
return m_variables[cur_message];
}

void DateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
void DeprecatedDateStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
m_timestamps = reader.read_unaligned_span<int64_t>(num_messages);
m_timestamp_encodings = reader.read_unaligned_span<int64_t>(num_messages);
}

std::variant<int64_t, double, std::string, uint8_t> DateStringColumnReader::extract_value(
std::variant<int64_t, double, std::string, uint8_t> DeprecatedDateStringColumnReader::extract_value(
uint64_t cur_message
) {
return m_timestamp_dict->get_string_encoding(
return m_timestamp_dict->get_deprecated_timestamp_string_encoding(
m_timestamps[cur_message],
m_timestamp_encodings[cur_message]
);
}

void DateStringColumnReader::extract_string_value_into_buffer(
void DeprecatedDateStringColumnReader::extract_string_value_into_buffer(
uint64_t cur_message,
std::string& buffer
) {
buffer.append(m_timestamp_dict->get_string_encoding(
buffer.append(m_timestamp_dict->get_deprecated_timestamp_string_encoding(
m_timestamps[cur_message],
m_timestamp_encodings[cur_message]
));
}

epochtime_t DateStringColumnReader::get_encoded_time(uint64_t cur_message) {
epochtime_t DeprecatedDateStringColumnReader::get_encoded_time(uint64_t cur_message) {
return m_timestamps[cur_message];
}

Expand Down
11 changes: 7 additions & 4 deletions components/core/src/clp_s/ColumnReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,20 +327,23 @@ class VariableStringColumnReader : public BaseColumnReader {
UnalignedMemSpan<uint64_t> m_variables;
};

class DateStringColumnReader : public BaseColumnReader {
class DeprecatedDateStringColumnReader : public BaseColumnReader {
public:
// Constructor
DateStringColumnReader(int32_t id, std::shared_ptr<TimestampDictionaryReader> timestamp_dict)
DeprecatedDateStringColumnReader(
int32_t id,
std::shared_ptr<TimestampDictionaryReader> timestamp_dict
)
: BaseColumnReader(id),
m_timestamp_dict(std::move(timestamp_dict)) {}

// Destructor
~DateStringColumnReader() override = default;
~DeprecatedDateStringColumnReader() override = default;

// Methods inherited from BaseColumnReader
void load(BufferViewReader& reader, uint64_t num_messages) override;

NodeType get_type() override { return NodeType::DateString; }
NodeType get_type() override { return NodeType::DeprecatedDateString; }

std::variant<int64_t, double, std::string, uint8_t> extract_value(
uint64_t cur_message
Expand Down
15 changes: 0 additions & 15 deletions components/core/src/clp_s/ColumnWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,21 +165,6 @@ void VariableStringColumnWriter::store(ZstdCompressor& compressor) {
compressor.write(reinterpret_cast<char const*>(m_var_dict_ids.data()), size);
}

size_t DateStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
auto encoded_timestamp = std::get<std::pair<uint64_t, epochtime_t>>(value);
m_timestamps.push_back(encoded_timestamp.second);
m_timestamp_encodings.push_back(encoded_timestamp.first);
return 2 * sizeof(int64_t);
;
}

void DateStringColumnWriter::store(ZstdCompressor& compressor) {
size_t timestamps_size = m_timestamps.size() * sizeof(int64_t);
compressor.write(reinterpret_cast<char const*>(m_timestamps.data()), timestamps_size);
size_t encodings_size = m_timestamp_encodings.size() * sizeof(int64_t);
compressor.write(reinterpret_cast<char const*>(m_timestamp_encodings.data()), encodings_size);
}

auto TimestampColumnWriter::add_value(ParsedMessage::variable_t& value) -> size_t {
auto const [timestamp, encoding] = std::get<std::pair<epochtime_t, uint64_t>>(value);
auto const encoded_timestamp_size{m_timestamps.add_value(timestamp)};
Expand Down
18 changes: 0 additions & 18 deletions components/core/src/clp_s/ColumnWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,24 +242,6 @@ class VariableStringColumnWriter : public BaseColumnWriter {
std::vector<clp::variable_dictionary_id_t> m_var_dict_ids;
};

class DateStringColumnWriter : public BaseColumnWriter {
public:
// Constructor
explicit DateStringColumnWriter(int32_t id) : BaseColumnWriter(id) {}

// Destructor
~DateStringColumnWriter() override = default;

// Methods inherited from BaseColumnWriter
size_t add_value(ParsedMessage::variable_t& value) override;

void store(ZstdCompressor& compressor) override;

private:
std::vector<int64_t> m_timestamps;
std::vector<int64_t> m_timestamp_encodings;
};

class TimestampColumnWriter : public BaseColumnWriter {
public:
// Constructor
Expand Down
Loading
Loading