Skip to content

Commit e69a34a

Browse files
committed
Add option to disable log order, and respond to other review comments
1 parent 2d1b76f commit e69a34a

File tree

6 files changed

+38
-15
lines changed

6 files changed

+38
-15
lines changed

components/core/src/clp_s/CommandLineArguments.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,15 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
189189
)(
190190
"print-archive-stats",
191191
po::bool_switch(&m_print_archive_stats),
192-
"Print statistics (json) about the archive after it's compressed."
192+
"Print statistics (json) about the archixve after it's compressed."
193193
)(
194194
"structurize-arrays",
195195
po::bool_switch(&m_structurize_arrays),
196196
"Structurize arrays instead of compressing them as clp strings."
197+
)(
198+
"disable-log-order",
199+
po::bool_switch(&m_no_record_log_order),
200+
"Do not record log order at ingestion time."
197201
);
198202
// clang-format on
199203

components/core/src/clp_s/CommandLineArguments.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ class CommandLineArguments {
112112

113113
std::vector<std::string> const& get_projection_columns() const { return m_projection_columns; }
114114

115+
bool get_record_log_order() const { return false == m_no_record_log_order; }
116+
115117
private:
116118
// Methods
117119
/**
@@ -178,6 +180,7 @@ class CommandLineArguments {
178180
bool m_ordered_decompression{false};
179181
size_t m_ordered_chunk_size{0};
180182
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
183+
bool m_no_record_log_order{false};
181184

182185
// Metadata db variables
183186
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;

components/core/src/clp_s/JsonParser.cpp

+20-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ JsonParser::JsonParser(JsonParserOption const& option)
1515
m_target_encoded_size(option.target_encoded_size),
1616
m_max_document_size(option.max_document_size),
1717
m_timestamp_key(option.timestamp_key),
18-
m_structurize_arrays(option.structurize_arrays) {
18+
m_structurize_arrays(option.structurize_arrays),
19+
m_record_log_order(option.record_log_order) {
1920
if (false == FileUtils::validate_path(option.file_paths)) {
2021
exit(1);
2122
}
@@ -447,6 +448,16 @@ bool JsonParser::parse() {
447448
m_num_messages = 0;
448449
size_t bytes_consumed_up_to_prev_archive = 0;
449450
size_t bytes_consumed_up_to_prev_record = 0;
451+
452+
int32_t log_event_idx_node_id{};
453+
auto add_log_event_idx_node = [&]() {
454+
if (m_record_log_order) {
455+
log_event_idx_node_id
456+
= add_metadata_field(constants::cLogEventIdxName, NodeType::Integer);
457+
}
458+
};
459+
add_log_event_idx_node();
460+
450461
while (json_file_iterator.get_json(json_it)) {
451462
m_current_schema.clear();
452463

@@ -468,12 +479,13 @@ bool JsonParser::parse() {
468479
}
469480

470481
// Add log_event_idx field to metadata for record
471-
auto log_event_idx = add_metadata_field(constants::cLogEventIdxName, NodeType::Integer);
472-
m_current_parsed_message.add_value(
473-
log_event_idx,
474-
m_archive_writer->get_next_log_event_id()
475-
);
476-
m_current_schema.insert_ordered(log_event_idx);
482+
if (m_record_log_order) {
483+
m_current_parsed_message.add_value(
484+
log_event_idx_node_id,
485+
m_archive_writer->get_next_log_event_id()
486+
);
487+
m_current_schema.insert_ordered(log_event_idx_node_id);
488+
}
477489

478490
// Some errors from simdjson are latent until trying to access invalid JSON fields.
479491
// Instead of checking for an error every time we access a JSON field in parse_line we
@@ -504,6 +516,7 @@ bool JsonParser::parse() {
504516
);
505517
bytes_consumed_up_to_prev_archive = bytes_consumed_up_to_prev_record;
506518
split_archive();
519+
add_log_event_idx_node();
507520
}
508521

509522
m_current_parsed_message.clear();

components/core/src/clp_s/JsonParser.hpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,13 @@ struct JsonParserOption {
3131
std::vector<std::string> file_paths;
3232
std::string timestamp_key;
3333
std::string archives_dir;
34-
size_t target_encoded_size;
35-
size_t max_document_size;
36-
size_t min_table_size;
37-
int compression_level;
38-
bool print_archive_stats;
39-
bool structurize_arrays;
34+
size_t target_encoded_size{};
35+
size_t max_document_size{};
36+
size_t min_table_size{};
37+
int compression_level{};
38+
bool print_archive_stats{};
39+
bool structurize_arrays{};
40+
bool record_log_order{true};
4041
std::shared_ptr<clp::GlobalMySQLMetadataDB> metadata_db;
4142
};
4243

@@ -118,6 +119,7 @@ class JsonParser {
118119
size_t m_target_encoded_size;
119120
size_t m_max_document_size;
120121
bool m_structurize_arrays{false};
122+
bool m_record_log_order{true};
121123
};
122124
} // namespace clp_s
123125

components/core/src/clp_s/SchemaTree.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ class SchemaTree {
127127
}
128128

129129
/**
130-
* @return the Id of the root of the Object sub-tree.
130+
* @return the Id of the root of the Object sub-tree that records the structure of JSON data.
131131
* @return -1 if the Object sub-tree does not exist.
132132
*/
133133
int32_t get_object_subtree_node_id() const { return m_object_subtree_id; }

components/core/src/clp_s/clp-s.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ bool compress(CommandLineArguments const& command_line_arguments) {
9696
option.timestamp_key = command_line_arguments.get_timestamp_key();
9797
option.print_archive_stats = command_line_arguments.print_archive_stats();
9898
option.structurize_arrays = command_line_arguments.get_structurize_arrays();
99+
option.record_log_order = command_line_arguments.get_record_log_order();
99100

100101
auto const& db_config_container = command_line_arguments.get_metadata_db_config();
101102
if (db_config_container.has_value()) {

0 commit comments

Comments
 (0)