Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Replace dictionary proxies with nested dictionaries 16/N #701

Merged
merged 4 commits into from
Oct 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions omniscidb/QueryEngine/Execute.cpp
Original file line number Diff line number Diff line change
@@ -461,7 +461,7 @@ StringDictionaryProxy* Executor::getStringDictionaryProxy(
return row_set_mem_owner->getOrAddStringDictProxy(dict_id_in, generation);
}

const StringDictionaryProxy::IdMap* Executor::getStringProxyTranslationMap(
const std::vector<int32_t>* Executor::getStringProxyTranslationMap(
const int source_dict_id,
const int dest_dict_id,
const RowSetMemoryOwner::StringTranslationType translation_type,
@@ -478,7 +478,7 @@ const StringDictionaryProxy::IdMap* Executor::getStringProxyTranslationMap(
source_dict_id, source_generation, dest_dict_id, dest_generation, translation_type);
}

const StringDictionaryProxy::IdMap* Executor::getIntersectionStringProxyTranslationMap(
const std::vector<int32_t>* Executor::getIntersectionStringProxyTranslationMap(
const StringDictionaryProxy* source_proxy,
const StringDictionaryProxy* dest_proxy,
std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) const {
@@ -489,7 +489,7 @@ const StringDictionaryProxy::IdMap* Executor::getIntersectionStringProxyTranslat
dest_proxy);
}

const StringDictionaryProxy::IdMap* RowSetMemoryOwner::getOrAddStringProxyTranslationMap(
const std::vector<int32_t>* RowSetMemoryOwner::getOrAddStringProxyTranslationMap(
const int source_dict_id_in,
const int64_t source_generation,
const int dest_dict_id_in,
4 changes: 2 additions & 2 deletions omniscidb/QueryEngine/Execute.h
Original file line number Diff line number Diff line change
@@ -304,14 +304,14 @@ class Executor : public StringDictionaryProxyProvider {
const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
const bool with_generation) const;

const StringDictionaryProxy::IdMap* getStringProxyTranslationMap(
const std::vector<int32_t>* getStringProxyTranslationMap(
const int source_dict_id,
const int dest_dict_id,
const RowSetMemoryOwner::StringTranslationType translation_type,
std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
const bool with_generation) const;

const StringDictionaryProxy::IdMap* getIntersectionStringProxyTranslationMap(
const std::vector<int32_t>* getIntersectionStringProxyTranslationMap(
const StringDictionaryProxy* source_proxy,
const StringDictionaryProxy* dest_proxy,
std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) const;
Original file line number Diff line number Diff line change
@@ -528,7 +528,7 @@ Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel(
}

StrProxyTranslationMapsPtrs decomposeStrDictTranslationMaps(
const std::vector<const StringDictionaryProxy::IdMap*>& str_proxy_translation_maps) {
const std::vector<const std::vector<int32_t>*>& str_proxy_translation_maps) {
StrProxyTranslationMapsPtrs translation_map_ptrs_and_offsets;
// First element of pair is vector of int32_t* pointing to translation map "vector"
// Second element of pair is vector of int32_t of min inner dictionary ids (offsets)
Original file line number Diff line number Diff line change
@@ -238,7 +238,7 @@ class BaselineJoinHashTable : public HashJoin {
ColumnCacheMap& column_cache_;
std::mutex cpu_hash_table_buff_mutex_;
std::mutex str_proxy_translation_mutex_;
std::vector<const StringDictionaryProxy::IdMap*> str_proxy_translation_maps_;
std::vector<const std::vector<int32_t>*> str_proxy_translation_maps_;

std::vector<InnerOuter> inner_outer_pairs_;
const int device_count_;
Original file line number Diff line number Diff line change
@@ -142,17 +142,16 @@ class PerfectJoinHashTableBuilder {
}
#endif

void initOneToOneHashTableOnCpu(
const JoinColumn& join_column,
const ExpressionRange& col_range,
const bool is_bitwise_eq,
const InnerOuter& cols,
const StringDictionaryProxy::IdMap* str_proxy_translation_map,
const JoinType join_type,
const HashType hash_type,
const HashEntryInfo hash_entry_info,
const int32_t hash_join_invalid_val,
const Executor* executor) {
void initOneToOneHashTableOnCpu(const JoinColumn& join_column,
const ExpressionRange& col_range,
const bool is_bitwise_eq,
const InnerOuter& cols,
const std::vector<int32_t>* str_proxy_translation_map,
const JoinType join_type,
const HashType hash_type,
const HashEntryInfo hash_entry_info,
const int32_t hash_join_invalid_val,
const Executor* executor) {
auto timer = DEBUG_TIMER(__func__);
const auto inner_col = cols.first;
CHECK(inner_col);
@@ -210,7 +209,7 @@ class PerfectJoinHashTableBuilder {
const ExpressionRange& col_range,
const bool is_bitwise_eq,
const std::pair<const hdk::ir::ColumnVar*, const hdk::ir::Expr*>& cols,
const StringDictionaryProxy::IdMap* str_proxy_translation_map,
const std::vector<int32_t>* str_proxy_translation_map,
const HashEntryInfo hash_entry_info,
const int32_t hash_join_invalid_val,
const Executor* executor) {
10 changes: 5 additions & 5 deletions omniscidb/QueryEngine/JoinHashTable/HashJoin.cpp
Original file line number Diff line number Diff line change
@@ -336,7 +336,7 @@ HashJoin::getStrDictProxies(const InnerOuter& cols, const Executor* executor) {
return inner_outer_str_dict_proxies;
}

const StringDictionaryProxy::IdMap* HashJoin::translateInnerToOuterStrDictProxies(
const std::vector<int32_t>* HashJoin::translateInnerToOuterStrDictProxies(
const InnerOuter& cols,
const Executor* executor) {
const auto inner_outer_proxies = HashJoin::getStrDictProxies(cols, executor);
@@ -418,14 +418,14 @@ CompositeKeyInfo HashJoin::getCompositeKeyInfo(
return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
}

std::vector<const StringDictionaryProxy::IdMap*>
HashJoin::translateCompositeStrDictProxies(const CompositeKeyInfo& composite_key_info,
const Executor* executor) {
std::vector<const std::vector<int32_t>*> HashJoin::translateCompositeStrDictProxies(
const CompositeKeyInfo& composite_key_info,
const Executor* executor) {
const auto& inner_proxies = composite_key_info.sd_inner_proxy_per_key;
const auto& outer_proxies = composite_key_info.sd_outer_proxy_per_key;
const size_t num_proxies = inner_proxies.size();
CHECK_EQ(num_proxies, outer_proxies.size());
std::vector<const StringDictionaryProxy::IdMap*> proxy_translation_maps;
std::vector<const std::vector<int32_t>*> proxy_translation_maps;
proxy_translation_maps.reserve(num_proxies);
for (size_t proxy_pair_idx = 0; proxy_pair_idx < num_proxies; ++proxy_pair_idx) {
const bool translate_proxies =
8 changes: 4 additions & 4 deletions omniscidb/QueryEngine/JoinHashTable/HashJoin.h
Original file line number Diff line number Diff line change
@@ -259,14 +259,14 @@ class HashJoin {
const std::vector<InnerOuter>& inner_outer_pairs,
const Executor* executor);

static std::vector<const StringDictionaryProxy::IdMap*>
translateCompositeStrDictProxies(const CompositeKeyInfo& composite_key_info,
const Executor* executor);
static std::vector<const std::vector<int32_t>*> translateCompositeStrDictProxies(
const CompositeKeyInfo& composite_key_info,
const Executor* executor);

static std::pair<const StringDictionaryProxy*, const StringDictionaryProxy*>
getStrDictProxies(const InnerOuter& cols, const Executor* executor);

static const StringDictionaryProxy::IdMap* translateInnerToOuterStrDictProxies(
static const std::vector<int32_t>* translateInnerToOuterStrDictProxies(
const InnerOuter& cols,
const Executor* executor);

2 changes: 1 addition & 1 deletion omniscidb/QueryEngine/JoinHashTable/PerfectJoinHashTable.h
Original file line number Diff line number Diff line change
@@ -252,7 +252,7 @@ class PerfectJoinHashTable : public HashJoin {
HashType hash_type_;
std::mutex cpu_hash_table_buff_mutex_;
std::mutex str_proxy_translation_mutex_;
const StringDictionaryProxy::IdMap* str_proxy_translation_map_{nullptr};
const std::vector<int32_t>* str_proxy_translation_map_{nullptr};
ExpressionRange col_range_;
ExpressionRange rhs_source_col_range_;
Executor* executor_;
2 changes: 1 addition & 1 deletion omniscidb/QueryEngine/StringDictionaryTranslationMgr.cpp
Original file line number Diff line number Diff line change
@@ -80,7 +80,7 @@ void StringDictionaryTranslationMgr::buildTranslationMap() {
void StringDictionaryTranslationMgr::createKernelBuffers() {
#ifdef HAVE_CUDA
if (memory_level_ == Data_Namespace::GPU_LEVEL) {
const size_t translation_map_size_bytes{host_translation_map_->getVectorMap().size() *
const size_t translation_map_size_bytes{host_translation_map_->size() *
sizeof(int32_t)};
for (int device_id = 0; device_id < device_count_; ++device_id) {
device_buffers_.emplace_back(GpuAllocator::allocGpuAbstractBuffer(
2 changes: 1 addition & 1 deletion omniscidb/QueryEngine/StringDictionaryTranslationMgr.h
Original file line number Diff line number Diff line change
@@ -77,7 +77,7 @@ class StringDictionaryTranslationMgr {
const int device_count_;
Executor* executor_;
Data_Namespace::DataMgr* data_mgr_;
const StringDictionaryProxy::IdMap* host_translation_map_{nullptr};
const std::vector<int32_t>* host_translation_map_{nullptr};
std::vector<const int32_t*> kernel_translation_maps_;
std::vector<Data_Namespace::AbstractBuffer*> device_buffers_;
};
4 changes: 2 additions & 2 deletions omniscidb/ResultSet/ResultSet.cpp
Original file line number Diff line number Diff line change
@@ -494,11 +494,11 @@ StringDictionaryProxy* ResultSet::getStringDictionaryProxy(int const dict_id) co
}

class ResultSet::CellCallback {
StringDictionaryProxy::IdMap const id_map_;
std::vector<int32_t> const id_map_;
int64_t const null_int_;

public:
CellCallback(StringDictionaryProxy::IdMap&& id_map, int64_t const null_int)
CellCallback(std::vector<int32_t>&& id_map, int64_t const null_int)
: id_map_(std::move(id_map)), null_int_(null_int) {}
void operator()(int8_t const* const cell_ptr) const {
using StringId = int32_t;
22 changes: 10 additions & 12 deletions omniscidb/ResultSet/RowSetMemoryOwner.h
Original file line number Diff line number Diff line change
@@ -193,24 +193,23 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
return it->second.get();
}

const StringDictionaryProxy::IdMap* addStringProxyIntersectionTranslationMap(
const std::vector<int32_t>* addStringProxyIntersectionTranslationMap(
const StringDictionaryProxy* source_proxy,
const StringDictionaryProxy* dest_proxy) {
std::lock_guard<std::mutex> lock(state_mutex_);
const auto map_key = std::make_pair(source_proxy->getBaseDictionary()->getDictId(),
dest_proxy->getBaseDictionary()->getDictId());
auto it = str_proxy_intersection_translation_maps_owned_.find(map_key);
if (it == str_proxy_intersection_translation_maps_owned_.end()) {
it = str_proxy_intersection_translation_maps_owned_
.emplace(
map_key,
source_proxy->buildIntersectionTranslationMapToOtherProxy(dest_proxy))
.first;
it =
str_proxy_intersection_translation_maps_owned_
.emplace(map_key, source_proxy->buildIntersectionTranslationMap(dest_proxy))
.first;
}
return &it->second;
}

const StringDictionaryProxy::IdMap* addStringProxyUnionTranslationMap(
const std::vector<int32_t>* addStringProxyUnionTranslationMap(
const StringDictionaryProxy* source_proxy,
StringDictionaryProxy* dest_proxy) {
std::lock_guard<std::mutex> lock(state_mutex_);
@@ -219,8 +218,7 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
auto it = str_proxy_union_translation_maps_owned_.find(map_key);
if (it == str_proxy_union_translation_maps_owned_.end()) {
it = str_proxy_union_translation_maps_owned_
.emplace(map_key,
source_proxy->buildUnionTranslationMapToOtherProxy(dest_proxy))
.emplace(map_key, source_proxy->buildUnionTranslationMap(dest_proxy))
.first;
}
return &it->second;
@@ -247,7 +245,7 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
return lit_str_dict_proxy_.get();
}

const StringDictionaryProxy::IdMap* getOrAddStringProxyTranslationMap(
const std::vector<int32_t>* getOrAddStringProxyTranslationMap(
const int source_dict_id_in,
const int64_t source_generation,
const int dest_dict_id_in,
@@ -293,9 +291,9 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
std::list<std::string> strings_;
std::list<std::vector<int64_t>> arrays_;
std::unordered_map<int, std::shared_ptr<StringDictionaryProxy>> str_dict_proxy_owned_;
std::map<std::pair<int, int>, StringDictionaryProxy::IdMap>
std::map<std::pair<int, int>, std::vector<int32_t>>
str_proxy_intersection_translation_maps_owned_;
std::map<std::pair<int, int>, StringDictionaryProxy::IdMap>
std::map<std::pair<int, int>, std::vector<int32_t>>
str_proxy_union_translation_maps_owned_;
std::shared_ptr<StringDictionaryProxy> lit_str_dict_proxy_;
std::vector<void*> col_buffers_;
Loading