Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
gibber9809 committed Nov 17, 2024
1 parent bdbaed2 commit 2d1b76f
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 41 deletions.
2 changes: 1 addition & 1 deletion components/core/src/clp_s/ArchiveReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void ArchiveReader::open(string_view archives_dir, string_view archive_id) {
m_schema_tree = ReaderUtils::read_schema_tree(archive_path_str);
m_schema_map = ReaderUtils::read_schemas(archive_path_str);

m_log_event_idx_column_id = m_schema_tree->get_internal_field_id(constants::cLogEventIdxName);
m_log_event_idx_column_id = m_schema_tree->get_metadata_field_id(constants::cLogEventIdxName);

m_table_metadata_file_reader.open(archive_path_str + constants::cArchiveTableMetadataFile);
m_stream_reader.open_packed_streams(archive_path_str + constants::cArchiveTablesFile);
Expand Down
14 changes: 7 additions & 7 deletions components/core/src/clp_s/JsonParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,8 @@ bool JsonParser::parse() {
return false;
}

// Add internal log_event_idx field to record
auto log_event_idx = add_internal_field(constants::cLogEventIdxName, NodeType::Integer);
// Add log_event_idx field to metadata for record
auto log_event_idx = add_metadata_field(constants::cLogEventIdxName, NodeType::Integer);
m_current_parsed_message.add_value(
log_event_idx,
m_archive_writer->get_next_log_event_id()
Expand Down Expand Up @@ -534,13 +534,13 @@ bool JsonParser::parse() {
return true;
}

int32_t JsonParser::add_internal_field(std::string_view const field_name, NodeType type) {
auto internal_subtree_id = m_archive_writer->add_node(
int32_t JsonParser::add_metadata_field(std::string_view const field_name, NodeType type) {
auto metadata_subtree_id = m_archive_writer->add_node(
constants::cRootNodeId,
NodeType::Internal,
constants::cInternalSubtreeName
NodeType::Metadata,
constants::cMetadataSubtreeName
);
return m_archive_writer->add_node(internal_subtree_id, type, field_name);
return m_archive_writer->add_node(metadata_subtree_id, type, field_name);
}

void JsonParser::store() {
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class JsonParser {
* Note: this method should be called before parsing a record so that internal fields come first
* in each table. This isn't strictly necessary, but it is a nice convention.
*/
int32_t add_internal_field(std::string_view const field_name, NodeType type);
int32_t add_metadata_field(std::string_view const field_name, NodeType type);

int m_num_messages;
std::vector<std::string> m_file_paths;
Expand Down
12 changes: 6 additions & 6 deletions components/core/src/clp_s/SchemaTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string_
if (constants::cRootNodeId == parent_node_id) {
if (NodeType::Object == type) {
m_object_subtree_id = node_id;
} else if (NodeType::Internal == type) {
m_internal_subtree_id = node_id;
} else if (NodeType::Metadata == type) {
m_metadata_subtree_id = node_id;
}
}

Expand All @@ -34,13 +34,13 @@ int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string_
return node_id;
}

int32_t SchemaTree::get_internal_field_id(std::string_view const field_name) {
if (m_internal_subtree_id < 0) {
int32_t SchemaTree::get_metadata_field_id(std::string_view const field_name) {
if (m_metadata_subtree_id < 0) {
return -1;
}

auto& internal_subtree_node = m_nodes[m_internal_subtree_id];
for (auto child_id : internal_subtree_node.get_children_ids()) {
auto& metadata_subtree_node = m_nodes[m_metadata_subtree_id];
for (auto child_id : metadata_subtree_node.get_children_ids()) {
auto& child_node = m_nodes[child_id];
if (child_node.get_key_name() == field_name) {
return child_id;
Expand Down
35 changes: 24 additions & 11 deletions components/core/src/clp_s/SchemaTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
#include <absl/container/flat_hash_map.h>

namespace clp_s {
/**
* This enum defines the valid MPT node types as well as the 8-bit number used to encode them.
*
* The number used to represent each node type can not change. That means that elements in this
* enum can never be reordered and that new node types always need to be added to the end of the
* enum (but before Unknown).
*
* Node types are used to help record the structure of a log record, with the exception of the
* "Metadata" node type. The "Metadata" type is a special type used by the implementation to
* demarcate data needed by the implementation that is not part of the log record. In particular,
* the implementation may create a special subtree of the MPT which contains fields used to record
* things like original log order.
*/
enum class NodeType : uint8_t {
Integer,
Float,
Expand All @@ -22,7 +35,7 @@ enum class NodeType : uint8_t {
NullValue,
DateString,
StructuredArray,
Internal,
Metadata,
Unknown = std::underlying_type<NodeType>::type(~0ULL)
};

Expand All @@ -46,12 +59,12 @@ class SchemaNode {
)
: m_parent_id(parent_id),
m_id(id),
m_key_buf(std::make_unique<char[]>(key_name.size())),
m_key_name(m_key_buf.get(), key_name.size()),
m_key_name_buf(std::make_unique<char[]>(key_name.size())),
m_key_name(m_key_name_buf.get(), key_name.size()),
m_type(type),
m_count(0),
m_depth(depth) {
memcpy(m_key_buf.get(), key_name.begin(), key_name.size());
memcpy(m_key_name_buf.get(), key_name.begin(), key_name.size());
}

/**
Expand Down Expand Up @@ -120,18 +133,18 @@ class SchemaTree {
int32_t get_object_subtree_node_id() const { return m_object_subtree_id; }

/**
* Get the field Id for a specified field within the Internal subtree.
* Get the field Id for a specified field within the Metadata subtree.
* @param field_name
*
* @return the field Id if the field exists within the Internal sub-tree, -1 otherwise.
* @return the field Id if the field exists within the Metadata sub-tree, -1 otherwise.
*/
int32_t get_internal_field_id(std::string_view const field_name);
int32_t get_metadata_field_id(std::string_view const field_name);

/**
* @return the Id of the root of the Internal sub-tree.
* @return -1 if the Internal sub-tree does not exist.
* @return the Id of the root of the Metadata sub-tree.
* @return -1 if the Metadata sub-tree does not exist.
*/
int32_t get_internal_subtree_node_id() { return m_internal_subtree_id; }
int32_t get_metadata_subtree_node_id() { return m_metadata_subtree_id; }

std::vector<SchemaNode> const& get_nodes() const { return m_nodes; }

Expand Down Expand Up @@ -169,7 +182,7 @@ class SchemaTree {
std::vector<SchemaNode> m_nodes;
absl::flat_hash_map<std::tuple<int32_t, std::string_view const, NodeType>, int32_t> m_node_map;
int32_t m_object_subtree_id{-1};
int32_t m_internal_subtree_id{-1};
int32_t m_metadata_subtree_id{-1};
};
} // namespace clp_s

Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp_s/archive_constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ constexpr char cArchiveVarDictFile[] = "/var.dict";
// Schema tree constants
constexpr char cRootNodeName[] = "";
constexpr int32_t cRootNodeId = -1;
constexpr char cInternalSubtreeName[] = "";
constexpr char cMetadataSubtreeName[] = "";
constexpr char cLogEventIdxName[] = "log_event_idx";

namespace results_cache::decompression {
Expand Down
26 changes: 14 additions & 12 deletions components/core/src/clp_s/search/Output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,14 @@ void Output::init(

for (auto column_reader : column_readers) {
auto column_id = column_reader->get_id();
if (((0
!= (m_wildcard_type_mask
& node_to_literal_type(m_schema_tree->get_node(column_id).get_type())))
|| m_match.schema_searches_against_column(schema_id, column_id))
&& 0 == m_internal_columns.count(column_id))
if ((0
!= (m_wildcard_type_mask
& node_to_literal_type(m_schema_tree->get_node(column_id).get_type())))
|| m_match.schema_searches_against_column(schema_id, column_id))
{
if (0 != m_metadata_columns.count(column_id)) {
continue;
}
ClpStringColumnReader* clp_reader = dynamic_cast<ClpStringColumnReader*>(column_reader);
VariableStringColumnReader* var_reader
= dynamic_cast<VariableStringColumnReader*>(column_reader);
Expand Down Expand Up @@ -963,15 +965,15 @@ void Output::populate_string_queries(std::shared_ptr<Expression> const& expr) {
}

void Output::populate_internal_columns() {
int32_t internal_subtree_root_node_id = m_schema_tree->get_internal_subtree_node_id();
if (-1 == internal_subtree_root_node_id) {
int32_t metadata_subtree_root_node_id = m_schema_tree->get_metadata_subtree_node_id();
if (-1 == metadata_subtree_root_node_id) {
return;
}

// This code assumes that the internal subtree contains no nested structures
auto& internal_node = m_schema_tree->get_node(internal_subtree_root_node_id);
for (auto child_id : internal_node.get_children_ids()) {
m_internal_columns.insert(child_id);
// This code assumes that the metadata subtree contains no nested structures
auto& metadata_node = m_schema_tree->get_node(metadata_subtree_root_node_id);
for (auto child_id : metadata_node.get_children_ids()) {
m_metadata_columns.insert(child_id);
}
}

Expand All @@ -991,7 +993,7 @@ void Output::populate_searched_wildcard_columns(std::shared_ptr<Expression> cons
if (Schema::schema_entry_is_unordered_object(node)) {
continue;
}
if (0 != m_internal_columns.count(node)) {
if (0 != m_metadata_columns.count(node)) {
continue;
}
auto tree_node_type = m_schema_tree->get_node(node).get_type();
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp_s/search/Output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Output : public FilterClass {
std::vector<ColumnDescriptor*> m_wildcard_columns;
std::map<ColumnDescriptor*, std::set<int32_t>> m_wildcard_to_searched_basic_columns;
LiteralTypeBitmask m_wildcard_type_mask{0};
std::unordered_set<int32_t> m_internal_columns;
std::unordered_set<int32_t> m_metadata_columns;

std::stack<
std::pair<ExpressionType, OpList::iterator>,
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp_s/search/SearchUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ LiteralType node_to_literal_type(NodeType type) {
return LiteralType::NullT;
case NodeType::DateString:
return LiteralType::EpochDateT;
case NodeType::Internal:
case NodeType::Metadata:
case NodeType::Unknown:
default:
return LiteralType::UnknownT;
Expand Down

0 comments on commit 2d1b76f

Please sign in to comment.