Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Update HDK to support config structure + use latest jit-engine #46

Merged
merged 2 commits into from
Aug 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ add_subdirectory(omniscidb/Logger)

add_subdirectory(omniscidb/Utils)
add_subdirectory(omniscidb/Calcite)
add_subdirectory(omniscidb/ConfigBuilder)
add_subdirectory(omniscidb/SchemaMgr)
add_subdirectory(omniscidb/StringDictionary)
add_subdirectory(omniscidb/L0Mgr)
Expand All @@ -163,7 +164,7 @@ if(BUILD_SHARED_LIBS AND ENABLE_PYTHON)
add_subdirectory(python)
endif()

install(TARGETS OSDependent Logger Shared Utils Calcite ArrowStorage StringDictionary DataMgr CudaMgr SchemaMgr L0Mgr QueryEngine Analyzer SqliteConnector RUNTIME)
install(TARGETS OSDependent Logger Shared Utils Calcite ArrowStorage StringDictionary DataMgr CudaMgr SchemaMgr L0Mgr QueryEngine Analyzer ConfigBuilder SqliteConnector RUNTIME)

add_executable(TestDriver apps/TestDriver.cpp)

Expand Down
2 changes: 1 addition & 1 deletion omniscidb
2 changes: 1 addition & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(pydeps
${PXD_SOURCES}
Calcite)

set(SETUP_LDFLAGS "-L$<TARGET_FILE_DIR:Calcite> -L$<TARGET_FILE_DIR:ArrowStorage> -L$<TARGET_FILE_DIR:QueryEngine> -L$<TARGET_FILE_DIR:SchemaMgr> -L$<TARGET_FILE_DIR:Logger> -L$<TARGET_FILE_DIR:Shared> -L$<TARGET_FILE_DIR:DataMgr>")
set(SETUP_LDFLAGS "-L$<TARGET_FILE_DIR:Calcite> -L$<TARGET_FILE_DIR:ArrowStorage> -L$<TARGET_FILE_DIR:QueryEngine> -L$<TARGET_FILE_DIR:SchemaMgr> -L$<TARGET_FILE_DIR:ConfigBuilder> -L$<TARGET_FILE_DIR:Logger> -L$<TARGET_FILE_DIR:Shared> -L$<TARGET_FILE_DIR:DataMgr>")
set(SETUP_FLAGS -g -f -I ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(pyhdk ALL
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && LDFLAGS=${SETUP_LDFLAGS} ${Python3_EXECUTABLE} ${SETUP_PY} build_ext ${SETUP_FLAGS}
Expand Down
2 changes: 1 addition & 1 deletion python/pyhdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
prev = sys.getdlopenflags()
sys.setdlopenflags(os.RTLD_LAZY | os.RTLD_GLOBAL)

from pyhdk._common import TypeInfo, SQLType, setGlobalConfig, initLogger
from pyhdk._common import TypeInfo, SQLType, buildConfig, initLogger
from pyhdk._execute import Executor
import pyhdk.sql as sql
import pyhdk.storage as storage
Expand Down
149 changes: 143 additions & 6 deletions python/pyhdk/_common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from libcpp cimport bool
from libcpp.string cimport string
from libcpp.memory cimport shared_ptr

cdef extern from "omniscidb/Shared/sqltypes.h":
enum CSQLTypes "SQLTypes":
Expand Down Expand Up @@ -87,16 +88,152 @@ cdef extern from "omniscidb/Shared/SystemParameters.h":
CSystemParameters()

cdef extern from "omniscidb/ThriftHandler/CommandLineOptions.h":
cdef bool g_enable_columnar_output
cdef bool g_enable_union
cdef bool g_enable_lazy_fetch
cdef bool g_null_div_by_zero
cdef bool g_enable_watchdog
cdef bool g_enable_dynamic_watchdog
cdef bool g_enable_debug_timer

cdef extern from "omniscidb/Logger/Logger.h" namespace "logger":
cdef cppclass CLogOptions "logger::LogOptions":
CLogOptions(const char*)

cdef void CInitLogger "logger::init"(const CLogOptions &)

cdef extern from "omniscidb/Shared/Config.h":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any tools to generate this from plain C structures?

cdef cppclass CWatchdogConfig "WatchdogConfig":
bool enable
bool enable_dynamic
size_t time_limit
size_t baseline_max_groups
size_t parallel_top_max

cdef cppclass CCpuSubTasksConfig "CpuSubTasksConfig":
bool enable
size_t sub_task_size

cdef cppclass CJoinConfig "JoinConfig":
bool allow_loop_joins
unsigned trivial_loop_join_threshold
bool inner_join_fragment_skipping
size_t huge_join_hash_threshold
size_t huge_join_hash_min_load

cdef cppclass CGroupByConfig "GroupByConfig":
bool bigint_count
size_t default_max_groups_buffer_entry_guess
size_t big_group_threshold
bool use_groupby_buffer_desc
bool enable_gpu_smem_group_by
bool enable_gpu_smem_non_grouped_agg
bool enable_gpu_smem_grouped_non_count_agg
size_t gpu_smem_threshold
unsigned hll_precision_bits
size_t baseline_threshold

cdef cppclass CWindowFunctionsConfig "WindowFunctionsConfig":
bool enable
bool parallel_window_partition_compute
size_t parallel_window_partition_compute_threshold
bool parallel_window_partition_sort
size_t parallel_window_partition_sort_threshold

cdef cppclass CHeterogenousConfig "HeterogenousConfig":
bool enable_heterogeneous_execution
bool enable_multifrag_heterogeneous_execution
bool forced_heterogeneous_distribution
unsigned forced_cpu_proportion
unsigned forced_gpu_proportion
bool allow_cpu_retry
bool allow_query_step_cpu_retry

cdef cppclass CInterruptConfig "InterruptConfig":
bool enable_runtime_query_interrupt
bool enable_non_kernel_time_query_interrupt
double running_query_interrupt_freq

cdef cppclass CCodegenConfig "CodegenConfig":
bool inf_div_by_zero
bool null_div_by_zero
bool hoist_literals
bool enable_filter_function

cdef cppclass CExecutionConfig "ExecutionConfig":
CWatchdogConfig watchdog
CCpuSubTasksConfig sub_tasks
CJoinConfig join
CGroupByConfig group_by
CWindowFunctionsConfig window_func
CHeterogenousConfig heterogeneous
CInterruptConfig interrupt
CCodegenConfig codegen
size_t streaming_topn_max
size_t parallel_top_min
bool enable_experimental_string_functions
bool enable_interop
size_t parallel_linearization_threshold
bool enable_multifrag_rs

cdef cppclass CFilterPushdownConfig "FilterPushdownConfig":
bool enable
float low_frac
float high_frac
size_t passing_row_ubound

cdef cppclass COptimizationsConfig "OptimizationsConfig":
CFilterPushdownConfig filter_pushdown
bool from_table_reordering
bool strip_join_covered_quals
size_t constrained_by_in_threshold
bool skip_intermediate_count
bool enable_left_join_filter_hoisting

cdef cppclass CResultSetConfig "ResultSetConfig":
bool enable_columnar_output
bool optimize_row_initialization
bool enable_direct_columnarization
bool enable_lazy_fetch

cdef cppclass CGpuMemoryConfig "GpuMemoryConfig":
bool enable_bump_allocator
size_t min_memory_allocation_size
size_t max_memory_allocation_size
double bump_allocator_step_reduction
double input_mem_limit_percent

cdef cppclass CCpuMemoryConfig "CpuMemoryConfig":
bool enable_tiered_cpu_mem
size_t pmem_size

cdef cppclass CMemoryConfig "MemoryConfig":
CCpuMemoryConfig cpu
CGpuMemoryConfig gpu

cdef cppclass CCacheConfig "CacheConfig":
bool use_estimator_result_cache
bool enable_data_recycler
bool use_hashtable_cache
size_t hashtable_cache_total_bytes
size_t max_cacheable_hashtable_size_bytes
double gpu_fraction_code_cache_to_evict
size_t dag_cache_size
size_t code_cache_size

cdef cppclass CDebugConfig "DebugConfig":
string build_ra_cache
string use_ra_cache
bool enable_automatic_ir_metadata

cdef cppclass CConfig "Config":
CExecutionConfig exec
COptimizationsConfig opts
CResultSetConfig rs
CMemoryConfig mem
CCacheConfig cache
CDebugConfig debug

cdef class Config:
cdef shared_ptr[CConfig] c_config

cdef extern from "omniscidb/ConfigBuilder/ConfigBuilder.h":
cdef cppclass CConfigBuilder "ConfigBuilder":
CConfigBuilder()

bool parseCommandLineArgs(const string&, const string&, bool) except +
shared_ptr[CConfig] config()
23 changes: 15 additions & 8 deletions python/pyhdk/_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from libcpp.memory cimport unique_ptr, make_unique
from libcpp.memory cimport unique_ptr, make_unique, shared_ptr
from cython.operator cimport dereference

cdef class SQLType:
Expand Down Expand Up @@ -116,17 +116,24 @@ cdef class TypeInfo:
def __repr__(self):
return self.c_type_info.toString()

def setGlobalConfig(*, enable_union=None, null_div_by_zero=None, enable_debug_timer=None, **kwargs):
global g_enable_union
global g_null_div_by_zero
def buildConfig(*, enable_debug_timer=None, enable_union=False, **kwargs):
global g_enable_debug_timer
if enable_union is not None:
g_enable_union = enable_union
if null_div_by_zero is not None:
g_null_div_by_zero = null_div_by_zero
if enable_debug_timer is not None:
g_enable_debug_timer = enable_debug_timer

# Remove legacy params to provide better compatibility with PyOmniSciDbe
kwargs.pop("enable_union", None)
kwargs.pop("enable_thrift_logs", None)

cmd_str = "".join(' --%s %r' % arg for arg in kwargs.iteritems())
cmd_str = cmd_str.replace("_", "-")
cdef string app = "modin".encode('UTF-8')
cdef CConfigBuilder builder
builder.parseCommandLineArgs(app, cmd_str, False)
cdef Config config = Config()
config.c_config = builder.config()
return config

def initLogger(*, **kwargs):
argv0 = "PyHDK".encode('UTF-8')
cdef char *cargv0 = argv0
Expand Down
7 changes: 5 additions & 2 deletions python/pyhdk/_execute.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ from libcpp.utility cimport move

from pyarrow.lib cimport CTable as CArrowTable

from pyhdk._common cimport CSystemParameters, CSQLTypeInfo
from pyhdk._common cimport CSystemParameters, CSQLTypeInfo, CConfig
from pyhdk._storage cimport CDataMgr, CBufferProvider

cdef extern from "omniscidb/QueryEngine/CompilationOptions.h":
Expand Down Expand Up @@ -108,7 +108,10 @@ cdef extern from "omniscidb/QueryEngine/ArrowResultSet.h":
cdef extern from "omniscidb/QueryEngine/Execute.h":
cdef cppclass CExecutor "Executor":
@staticmethod
shared_ptr[CExecutor] getExecutor(size_t, CDataMgr*, CBufferProvider*, const string&, const string&, const CSystemParameters&)
shared_ptr[CExecutor] getExecutor(size_t, CDataMgr*, CBufferProvider*, shared_ptr[CConfig], const string&, const string&, const CSystemParameters&)

const CConfig &getConfig()
shared_ptr[CConfig] getConfigPtr()

cdef class Executor:
cdef shared_ptr[CExecutor] c_executor
5 changes: 3 additions & 2 deletions python/pyhdk/_execute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from pyhdk._common cimport Config
from pyhdk._storage cimport DataMgr

cdef class Executor:
def __cinit__(self, DataMgr data_mgr, int id = 0):
def __cinit__(self, DataMgr data_mgr, Config config, int id = 0):
cdef CSystemParameters params = CSystemParameters()
cdef string debug_dir = "".encode('UTF-8')
cdef string debug_file = "".encode('UTF-8')
cdef CBufferProvider *buffer_provider = data_mgr.c_data_mgr.get().getBufferProvider()
self.c_executor = CExecutor.getExecutor(id, data_mgr.c_data_mgr.get(), buffer_provider, debug_dir, debug_file, params)
self.c_executor = CExecutor.getExecutor(id, data_mgr.c_data_mgr.get(), buffer_provider, config.c_config, debug_dir, debug_file, params)
9 changes: 5 additions & 4 deletions python/pyhdk/_sql.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr
from libcpp.string cimport string
from libcpp.vector cimport vector

from pyhdk._common cimport CSystemParameters
from pyhdk._common cimport CSystemParameters, CConfig
from pyhdk._storage cimport CSchemaProviderPtr, CDataProvider, CDataMgr, CBufferProvider
from pyhdk._execute cimport CExecutor, CResultSetPtr, CCompilationOptions, CExecutionOptions, CTargetMetaInfo

Expand Down Expand Up @@ -51,8 +51,8 @@ cdef extern from "omniscidb/Calcite/CalciteJNI.h":
int input_next;

cdef cppclass CalciteJNI:
CalciteJNI(CSchemaProviderPtr, string, size_t);
string process(string, string, string, vector[FilterPushDownInfo], bool, bool, bool) except +
CalciteJNI(CSchemaProviderPtr, shared_ptr[CConfig], const string&, size_t);
string process(const string&, const string&, const vector[FilterPushDownInfo]&, bool, bool, bool) except +

string getExtensionFunctionWhitelist()
string getUserDefinedFunctionWhitelist()
Expand All @@ -63,7 +63,7 @@ cdef extern from "omniscidb/QueryEngine/RelAlgDagBuilder.h":
pass

cdef cppclass CRelAlgDagBuilder "RelAlgDagBuilder"(CRelAlgDag):
CRelAlgDagBuilder(const string&, int, CSchemaProviderPtr) except +
CRelAlgDagBuilder(const string&, int, CSchemaProviderPtr, shared_ptr[CConfig]) except +

cdef extern from "omniscidb/QueryEngine/Descriptors/RelAlgExecutionDescriptor.h":
cdef cppclass CExecutionResult "ExecutionResult":
Expand All @@ -80,3 +80,4 @@ cdef extern from "omniscidb/QueryEngine/RelAlgExecutor.h":
CRelAlgExecutor(CExecutor*, CSchemaProviderPtr, CDataProvider*, unique_ptr[CRelAlgDag])

CExecutionResult executeRelAlgQuery(const CCompilationOptions&, const CExecutionOptions&, const bool) except +
CExecutor *getExecutor()
29 changes: 11 additions & 18 deletions python/pyhdk/_sql.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,18 @@ from cython.operator cimport dereference, preincrement
from pyarrow.lib cimport pyarrow_wrap_table
from pyarrow.lib cimport CTable as CArrowTable

from pyhdk._common cimport g_enable_columnar_output
from pyhdk._common cimport g_enable_watchdog
from pyhdk._common cimport g_enable_dynamic_watchdog
from pyhdk._common cimport g_enable_lazy_fetch
from pyhdk._common cimport CConfig, Config
from pyhdk._storage cimport SchemaProvider, CDataMgr, DataMgr
from pyhdk._execute cimport Executor, CExecutorDeviceType, CArrowResultSetConverter

cdef class Calcite:
cdef shared_ptr[CalciteJNI] calcite

def __cinit__(self, SchemaProvider schema_provider, **kwargs):
def __cinit__(self, SchemaProvider schema_provider, Config config, **kwargs):
cdef string udf_filename = kwargs.get("udf_filename", "")
cdef size_t calcite_max_mem_mb = kwargs.get("calcite_max_mem_mb", 1024)

self.calcite = make_shared[CalciteJNI](schema_provider.c_schema_provider, udf_filename, calcite_max_mem_mb)
self.calcite = make_shared[CalciteJNI](schema_provider.c_schema_provider, config.c_config, udf_filename, calcite_max_mem_mb)

CExtensionFunctionsWhitelist.add(self.calcite.get().getExtensionFunctionWhitelist())
if not udf_filename.empty():
Expand All @@ -46,13 +43,12 @@ cdef class Calcite:
self.calcite.get().setRuntimeExtensionFunctions(udfs, udtfs, False)

def process(self, string sql, **kwargs):
cdef string user = kwargs.get("user", "admin")
cdef string db_name = kwargs.get("db_name", "test-db")
cdef vector[FilterPushDownInfo] filter_push_down_info = vector[FilterPushDownInfo]()
cdef bool legacy_syntax = kwargs.get("legacy_syntax", False)
cdef bool is_explain = kwargs.get("is_explain", False)
cdef bool is_view_optimize = kwargs.get("is_view_optimize", False)
return self.calcite.get().process(user, db_name, sql, filter_push_down_info, legacy_syntax, is_explain, is_view_optimize)
return self.calcite.get().process(db_name, sql, filter_push_down_info, legacy_syntax, is_explain, is_view_optimize)

cdef class ExecutionResult:
cdef CExecutionResult c_result
Expand Down Expand Up @@ -94,23 +90,20 @@ cdef class RelAlgExecutor:
if len(db_ids) == 1:
db_id = db_ids[0]

c_dag.reset(new CRelAlgDagBuilder(ra_json, db_id, c_schema_provider))
c_dag.reset(new CRelAlgDagBuilder(ra_json, db_id, c_schema_provider, c_executor.getConfigPtr()))

self.c_rel_alg_executor = make_shared[CRelAlgExecutor](c_executor, c_schema_provider, c_data_provider, move(c_dag))
self.c_data_mgr = data_mgr.c_data_mgr

def execute(self, **kwargs):
global g_enable_columnar_output
global g_enable_watchdog
global g_enable_dynamic_watchdog
global g_enable_lazy_fetch
cdef const CConfig *config = self.c_rel_alg_executor.get().getExecutor().getConfigPtr().get()
cdef CCompilationOptions c_co = CCompilationOptions.defaults(CExecutorDeviceType.CPU)
c_co.allow_lazy_fetch = kwargs.get("enable_lazy_fetch", g_enable_lazy_fetch)
c_co.with_dynamic_watchdog = kwargs.get("enable_dynamic_watchdog", g_enable_dynamic_watchdog)
c_co.allow_lazy_fetch = kwargs.get("enable_lazy_fetch", config.rs.enable_lazy_fetch)
c_co.with_dynamic_watchdog = kwargs.get("enable_dynamic_watchdog", config.exec.watchdog.enable_dynamic)
cdef CExecutionOptions c_eo = CExecutionOptions.defaults()
c_eo.output_columnar_hint = kwargs.get("enable_columnar_output", g_enable_columnar_output)
c_eo.with_watchdog = kwargs.get("enable_watchdog", g_enable_watchdog)
c_eo.with_dynamic_watchdog = kwargs.get("enable_dynamic_watchdog", g_enable_dynamic_watchdog)
c_eo.output_columnar_hint = kwargs.get("enable_columnar_output", config.rs.enable_columnar_output)
c_eo.with_watchdog = kwargs.get("enable_watchdog", config.exec.watchdog.enable)
c_eo.with_dynamic_watchdog = kwargs.get("enable_dynamic_watchdog", config.exec.watchdog.enable_dynamic)
c_eo.just_explain = kwargs.get("just_explain", False)
cdef CExecutionResult c_res = self.c_rel_alg_executor.get().executeRelAlgQuery(c_co, c_eo, False)
cdef ExecutionResult res = ExecutionResult()
Expand Down
Loading