Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Dwarf bench integration #187

Merged
merged 11 commits into from
Feb 17, 2023
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ add_definitions("-DENABLE_TBB")
add_definitions("-DHAVE_TBB")
add_definitions("-DTBB_PREVIEW_TASK_GROUP_EXTENSIONS=1")

# Dwarf Bench
option(ENABLE_DWARF_BENCH "Enable DwarfBench library for Cost Model" OFF)
if(ENABLE_DWARF_BENCH)
find_package(dbench REQUIRED)
message(STATUS "Dwarf Bench enabled ${dbench_DIR}")
add_definitions("-DENABLE_DWARF_BENCH")
endif()

option(ENABLE_L0 "Enable level zero support" OFF)
if(ENABLE_L0)
find_package(LevelZero REQUIRED COMPONENTS ${LevelZero_COMPONENTS})
Expand Down
10 changes: 9 additions & 1 deletion omniscidb/QueryEngine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,16 @@ set(query_engine_source_files

list(APPEND query_engine_source_files
CostModel/CostModel.cpp
CostModel/DataSources/DwarfBench.cpp
CostModel/DataSources/EmptyDataSource.cpp
CostModel/ExtrapolationModels/LinearExtrapolation.cpp
CostModel/DummyCostModel.cpp
CostModel/DataSources/DataSource.cpp
CostModel/Measurements.cpp)

if(ENABLE_DWARF_BENCH)
list(APPEND query_engine_source_files CostModel/DataSources/DwarfBench.cpp)
endif()

if(NOT MSVC)
list(APPEND query_engine_source_files ${CMAKE_CURRENT_BINARY_DIR}/gen-cpp/TableFunctionsFactory_init.cpp)
endif()
Expand Down Expand Up @@ -357,6 +360,11 @@ set(QUERY_ENGINE_LIBS

list(APPEND QUERY_ENGINE_LIBS ${llvm_libs} ${ZLIB_LIBRARIES})


if(ENABLE_DWARF_BENCH)
list(APPEND QUERY_ENGINE_LIBS dbench::dbench)
endif()

target_link_libraries(QueryEngine ${QUERY_ENGINE_LIBS})

set(TABLE_FUNCTIONS_DEPS "")
Expand Down
18 changes: 18 additions & 0 deletions omniscidb/QueryEngine/CostModel/DataSources/DataSource.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,22 @@ class DataSource {
std::string dataSourceName;
};

class DataSourceException : public std::runtime_error {
public:
DataSourceException(const std::string& msg)
: std::runtime_error("Data Source exception: " + msg){};
};

class UnsupportedAnalyticalTemplate : public DataSourceException {
public:
UnsupportedAnalyticalTemplate(AnalyticalTemplate templ)
: DataSourceException("unsupported template: " + templateToString(templ)){};
};

class UnsupportedDevice : public DataSourceException {
public:
UnsupportedDevice(ExecutorDeviceType device)
: DataSourceException("unsupported device: " + deviceToString(device)){};
};

} // namespace costmodel
175 changes: 56 additions & 119 deletions omniscidb/QueryEngine/CostModel/DataSources/DwarfBench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,147 +18,84 @@

namespace costmodel {

DwarfBench::DwarfBench()
: DataSource(DataSourceConfig{
.dataSourceName = "DwarfBench",
.supportedDevices = {ExecutorDeviceType::CPU, ExecutorDeviceType::GPU},
.supportedTemplates = {AnalyticalTemplate::GroupBy,
AnalyticalTemplate::Join,
AnalyticalTemplate::Reduce,
AnalyticalTemplate::Scan}}) {}

const std::string DwarfBench::sizeHeader = "buf_size_bytes";
const std::string DwarfBench::timeHeader = "total_time";

std::string DwarfBench::getDwarfBenchPath() {
static const char* DWARF_BENCH_PATH = std::getenv("DWARF_BENCH_PATH");

if (DWARF_BENCH_PATH == NULL) {
throw DwarfBenchException("DWARF_BENCH_PATH environment variable not set");
} else {
return DWARF_BENCH_PATH;
}
}

Detail::DeviceMeasurements DwarfBench::getMeasurements(
DwarfBenchDataSource::DwarfBenchDataSource()
: DataSource(DataSourceConfig{.dataSourceName = "DwarfBench",
.supportedDevices = {ExecutorDeviceType::CPU},
.supportedTemplates = {AnalyticalTemplate::GroupBy,
AnalyticalTemplate::Join,
AnalyticalTemplate::Scan}}) {}

Detail::DeviceMeasurements DwarfBenchDataSource::getMeasurements(
const std::vector<ExecutorDeviceType>& devices,
const std::vector<AnalyticalTemplate>& templates) {
Detail::DeviceMeasurements dm;
boost::filesystem::path dwarf_path = getDwarfBenchPath();

if (!boost::filesystem::exists(dwarf_path / "results")) {
boost::filesystem::create_directory(dwarf_path / "results");
}

for (AnalyticalTemplate templ : templates) {
CHECK(isTemplateSupported(templ));
for (ExecutorDeviceType device : devices) {
boost::filesystem::path reportFile = runDwarfAndGetReportFile(templ, device);
dm[device][templ] = parser.parseMeasurement(reportFile);
CHECK(isDeviceSupported(device));

dm[device][templ] = measureTemplateOnDevice(device, templ);
}
}

return dm;
}

// TODO: more crossplatform and check errors
boost::filesystem::path DwarfBench::runDwarfAndGetReportFile(AnalyticalTemplate templ,
ExecutorDeviceType device) {
boost::filesystem::path dwarf_path = getDwarfBenchPath();
std::string deviceName = deviceToDwarfString(device);
std::string templateName = templateToDwarfString(templ);
boost::filesystem::path reportFile =
dwarf_path / "results" / ("report_" + templateName + ".csv");

std::string scriptPath = getDwarfBenchPath() + "/scripts/" + "run.py";
std::string executeLine = scriptPath + " --dwarf " + templateName + " --report_path " +
reportFile.string() + " --device " + deviceName +
" > /dev/null";
system(executeLine.c_str());

return reportFile;
}

std::vector<Detail::Measurement> DwarfBench::DwarfCsvParser::parseMeasurement(
const boost::filesystem::path& csv) {
line.clear();
entries.clear();

std::ifstream in(csv);
if (!in.good())
throw DwarfBenchException("No such report file: " + csv.string());

CsvColumnIndexes indexes = parseHeader(in);
std::vector<Detail::Measurement> ms = parseMeasurements(in, indexes);
std::sort(ms.begin(), ms.end(), Detail::BytesOrder());

return ms;
}

Detail::Measurement DwarfBench::DwarfCsvParser::parseLine(
const CsvColumnIndexes& indexes) {
entries.clear();
boost::split(entries, line, boost::is_any_of(","));

Detail::Measurement m = {.bytes = std::stoull(entries.at(indexes.sizeIndex)),
.milliseconds = std::stoull(entries.at(indexes.timeIndex))};

return m;
}

size_t DwarfBench::DwarfCsvParser::getCsvColumnIndex(const std::string& columnName) {
auto iter = std::find(entries.begin(), entries.end(), columnName);

if (iter == entries.end())
throw DwarfBenchException("No such column: " + columnName);

return iter - entries.begin();
}

DwarfBench::DwarfCsvParser::CsvColumnIndexes DwarfBench::DwarfCsvParser::parseHeader(
std::ifstream& in) {
in.seekg(0);

std::getline(in, line);
boost::split(entries, line, boost::is_any_of(","));

CsvColumnIndexes indexes = {.timeIndex = getCsvColumnIndex(timeHeader),
.sizeIndex = getCsvColumnIndex(sizeHeader)};

return indexes;
}

std::vector<Detail::Measurement> DwarfBench::DwarfCsvParser::parseMeasurements(
std::ifstream& in,
const CsvColumnIndexes& indexes) {
std::vector<Detail::Measurement> DwarfBenchDataSource::measureTemplateOnDevice(
ExecutorDeviceType device,
AnalyticalTemplate templ) {
std::vector<Detail::Measurement> ms;

while (std::getline(in, line)) {
entries.clear();
boost::split(entries, line, boost::is_any_of(","));

ms.push_back(parseLine(indexes));
for (size_t inputSize : dwarfBenchInputSizes) {
DwarfBench::RunConfig rc = {
.device = convertDeviceType(device),
.inputSize = inputSize,
.iterations = dwarfBenchIterations,
.dwarf = convertToDwarf(templ),
};

std::vector<Detail::Measurement> inputSizeMeasurements =
convertMeasurement(db.makeMeasurements(rc));

ms.insert(ms.end(), inputSizeMeasurements.begin(), inputSizeMeasurements.end());
}

return ms;
}

std::string DwarfBench::deviceToDwarfString(ExecutorDeviceType device) {
return device == ExecutorDeviceType::CPU ? "cpu" : "gpu";
}

std::string DwarfBench::templateToDwarfString(AnalyticalTemplate templ) {
DwarfBench::Dwarf DwarfBenchDataSource::convertToDwarf(AnalyticalTemplate templ) {
switch (templ) {
case AnalyticalTemplate::GroupBy:
return "groupby";
case AnalyticalTemplate::Join:
return "join";
return DwarfBench::Dwarf::GroupBy;
case AnalyticalTemplate::Scan:
return "scan";
return DwarfBench::Dwarf::DPLScan;
case AnalyticalTemplate::Join:
return DwarfBench::Dwarf::Join;
case AnalyticalTemplate::Reduce:
return "reduce";
default:
return "unknown";
throw UnsupportedAnalyticalTemplate(templ);
}
}

DwarfBench::DeviceType DwarfBenchDataSource::convertDeviceType(
ExecutorDeviceType device) {
switch (device) {
case ExecutorDeviceType::CPU:
return DwarfBench::DeviceType::CPU;
case ExecutorDeviceType::GPU:
return DwarfBench::DeviceType::GPU;
}
}

std::vector<Detail::Measurement> DwarfBenchDataSource::convertMeasurement(
const std::vector<DwarfBench::Measurement> measurements) {
std::vector<Detail::Measurement> ms;
std::transform(measurements.begin(),
measurements.end(),
std::back_inserter(ms),
[](DwarfBench::Measurement m) {
return Detail::Measurement{.bytes = m.dataSize,
.milliseconds = m.microseconds / 1000};
});
return ms;
}

} // namespace costmodel
47 changes: 13 additions & 34 deletions omniscidb/QueryEngine/CostModel/DataSources/DwarfBench.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,55 +21,34 @@

#include "DataSource.h"

#include <bench.hpp>

namespace costmodel {

// This is a temporary implementation while there is no
// library for interaction in dwarf bench
class DwarfBench : public DataSource {
class DwarfBenchDataSource : public DataSource {
public:
DwarfBench();
DwarfBenchDataSource();

Detail::DeviceMeasurements getMeasurements(
const std::vector<ExecutorDeviceType>& devices,
const std::vector<AnalyticalTemplate>& templates) override;

private:
class DwarfCsvParser {
public:
std::vector<Detail::Measurement> parseMeasurement(const boost::filesystem::path& csv);

private:
struct CsvColumnIndexes {
size_t timeIndex;
size_t sizeIndex;
};
std::string line;
std::vector<std::string> entries;

size_t getCsvColumnIndex(const std::string& columnName);
CsvColumnIndexes parseHeader(std::ifstream& in);
Detail::Measurement parseLine(const CsvColumnIndexes& indexes);
std::vector<Detail::Measurement> parseMeasurements(std::ifstream& in,
const CsvColumnIndexes& indexes);
};
const size_t dwarfBenchIterations = 10;
const std::vector<size_t> dwarfBenchInputSizes = {256, 512, 1024, 2048};

DwarfCsvParser parser;
std::vector<Detail::Measurement> measureTemplateOnDevice(ExecutorDeviceType device,
AnalyticalTemplate templ);

boost::filesystem::path runDwarfAndGetReportFile(AnalyticalTemplate templ,
ExecutorDeviceType device);
DwarfBench::Dwarf convertToDwarf(AnalyticalTemplate templ);
DwarfBench::DeviceType convertDeviceType(ExecutorDeviceType device);

std::string deviceToDwarfString(ExecutorDeviceType device);
std::string templateToDwarfString(AnalyticalTemplate templ);
std::vector<Detail::Measurement> convertMeasurement(
const std::vector<DwarfBench::Measurement> measurements);

static const std::string sizeHeader;
static const std::string timeHeader;
static std::string getDwarfBenchPath();
};

class DwarfBenchException : public std::runtime_error {
public:
DwarfBenchException(const std::string& msg)
: std::runtime_error("DwarfBench data source exception: " + msg){};
DwarfBench::DwarfBench db;
};

} // namespace costmodel
9 changes: 6 additions & 3 deletions omniscidb/Tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,12 @@ if(ENABLE_L0)
add_test(DataMgrWithL0Test DataMgrWithL0Test ${TEST_ARGS})
endif()

if (ENABLE_COST_MODEL)
add_executable(CostModelTest CostModel/CostModelTest.cpp)
target_link_libraries(CostModelTest gtest)
add_executable(CostModelTest CostModel/CostModelTest.cpp)
target_link_libraries(CostModelTest gtest QueryEngine)

if(ENABLE_DWARF_BENCH)
add_executable(DwarfBenchIntegrationTest CostModel/DwarfBenchIntegrationTest.cpp)
target_link_libraries(DwarfBenchIntegrationTest gtest QueryEngine)
endif()

target_link_libraries(ResultSetTest gtest QueryEngine ArrowQueryRunner ArrowStorage)
Expand Down
18 changes: 0 additions & 18 deletions omniscidb/Tests/CostModel/CostModelTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include <gtest/gtest.h>

#include "QueryEngine/CostModel/DataSources/DataSource.h"
#include "QueryEngine/CostModel/DataSources/DwarfBench.h"
#include "QueryEngine/CostModel/ExtrapolationModels/LinearExtrapolation.h"

using namespace costmodel;
Expand Down Expand Up @@ -43,23 +42,6 @@ TEST(DataSourceTests, SupportCheckTest) {
ASSERT_FALSE(ds.isTemplateSupported(AnalyticalTemplate::Join));
}

TEST(DataSourceTests, DwarfBenchSupportCheckTest) {
DwarfBench db;
ASSERT_EQ(db.getName(), "DwarfBench");

ASSERT_TRUE(db.isDeviceSupported(ExecutorDeviceType::CPU));
ASSERT_TRUE(db.isDeviceSupported(ExecutorDeviceType::GPU));

ASSERT_TRUE(db.isTemplateSupported(AnalyticalTemplate::GroupBy));
ASSERT_TRUE(db.isTemplateSupported(AnalyticalTemplate::Join));
ASSERT_TRUE(db.isTemplateSupported(AnalyticalTemplate::Reduce));
ASSERT_TRUE(db.isTemplateSupported(AnalyticalTemplate::Scan));
}

TEST(DataSourceTests, DwarfBenchGetMeasurements) {
// TODO
}

TEST(ExtrapolationModelsTests, LinearExtrapolationTest1) {
LinearExtrapolation le{{
{.bytes = 10, .milliseconds = 100},
Expand Down
Loading