Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 'weighted mean' aggregate support in <perspective-viewer> #1543

Merged
merged 20 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ if (PSP_WASM_BUILD)
-s MODULARIZE=1 \
-s EXPORT_NAME=\"load_perspective\" \
-s MAXIMUM_MEMORY=4gb \
-s WASM_BIGINT=1 \
-s USE_ES6_IMPORT_META=0 \
-s EXPORTED_FUNCTIONS=\"['_main']\" \
")
Expand Down
246 changes: 142 additions & 104 deletions cpp/perspective/src/cpp/aggregate.cpp

Large diffs are not rendered by default.

35 changes: 21 additions & 14 deletions cpp/perspective/src/cpp/aggspec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ t_col_name_type::t_col_name_type(const std::string& name, t_dtype type)

t_aggspec::t_aggspec() {}

t_aggspec::t_aggspec(
const std::string& name, t_aggtype agg, const std::vector<t_dep>& dependencies)
t_aggspec::t_aggspec(const std::string& name, t_aggtype agg,
const std::vector<t_dep>& dependencies)
: m_name(name)
, m_disp_name(name)
, m_agg(agg)
, m_dependencies(dependencies) {}

t_aggspec::t_aggspec(const std::string& aggname, t_aggtype agg, const std::string& dep)
t_aggspec::t_aggspec(
const std::string& aggname, t_aggtype agg, const std::string& dep)
: m_name(aggname)
, m_disp_name(aggname)
, m_agg(agg)
Expand All @@ -40,23 +41,24 @@ t_aggspec::t_aggspec(t_aggtype agg, const std::string& dep)
: m_agg(agg)
, m_dependencies(std::vector<t_dep>{t_dep(dep, DEPTYPE_COLUMN)}) {}

t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name, t_aggtype agg,
const std::vector<t_dep>& dependencies)
t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name,
t_aggtype agg, const std::vector<t_dep>& dependencies)
: m_name(name)
, m_disp_name(disp_name)
, m_agg(agg)
, m_dependencies(dependencies) {}

t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name, t_aggtype agg,
const std::vector<t_dep>& dependencies, t_sorttype sort_type)
t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name,
t_aggtype agg, const std::vector<t_dep>& dependencies, t_sorttype sort_type)
: m_name(name)
, m_disp_name(disp_name)
, m_agg(agg)
, m_dependencies(dependencies)
, m_sort_type(sort_type) {}

t_aggspec::t_aggspec(const std::string& aggname, const std::string& disp_aggname, t_aggtype agg,
t_uindex agg_one_idx, t_uindex agg_two_idx, double agg_one_weight, double agg_two_weight)
t_aggspec::t_aggspec(const std::string& aggname,
const std::string& disp_aggname, t_aggtype agg, t_uindex agg_one_idx,
t_uindex agg_two_idx, double agg_one_weight, double agg_two_weight)
: m_name(aggname)
, m_disp_name(disp_aggname)
, m_agg(agg)
Expand Down Expand Up @@ -232,7 +234,9 @@ get_simple_accumulator_type(t_dtype coltype) {
return DTYPE_FLOAT64;
}

default: { PSP_COMPLAIN_AND_ABORT("Unexpected coltype"); }
default: {
PSP_COMPLAIN_AND_ABORT("Unexpected coltype");
}
}
return DTYPE_NONE;
}
Expand Down Expand Up @@ -271,7 +275,7 @@ std::vector<std::string>
t_aggspec::get_input_depnames() const {
std::vector<std::string> rval;
rval.reserve(m_dependencies.size());
for (const auto & d : m_dependencies) {
for (const auto& d : m_dependencies) {
rval.push_back(d.name());
}
return rval;
Expand All @@ -281,7 +285,7 @@ std::vector<std::string>
t_aggspec::get_output_depnames() const {
std::vector<std::string> rval;
rval.reserve(m_dependencies.size());
for (const auto & d: m_dependencies) {
for (const auto& d : m_dependencies) {
rval.push_back(d.name());
}
return rval;
Expand All @@ -298,7 +302,8 @@ t_aggspec::get_output_specs(const t_schema& schema) const {
case AGGTYPE_MUL:
case AGGTYPE_SUM_NOT_NULL: {
t_dtype coltype = schema.get_dtype(m_dependencies[0].name());
return mk_col_name_type_vec(name(), get_simple_accumulator_type(coltype));
return mk_col_name_type_vec(
name(), get_simple_accumulator_type(coltype));
}
case AGGTYPE_ANY:
case AGGTYPE_UNIQUE:
Expand Down Expand Up @@ -352,7 +357,9 @@ t_aggspec::get_output_specs(const t_schema& schema) const {
case AGGTYPE_DISTINCT_COUNT: {
return mk_col_name_type_vec(name(), DTYPE_UINT32);
}
default: { PSP_COMPLAIN_AND_ABORT("Unknown agg type"); }
default: {
PSP_COMPLAIN_AND_ABORT("Unknown agg type");
}
}

return std::vector<t_col_name_type>();
Expand Down
4 changes: 2 additions & 2 deletions cpp/perspective/src/cpp/arg_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ t_argsort_comparator::operator()(t_index a, t_index b) const {
}

void
simple_argsort(
std::vector<t_tscalar>& v, std::vector<t_index>& output, const t_sorttype& sort_type) {
simple_argsort(std::vector<t_tscalar>& v, std::vector<t_index>& output,
const t_sorttype& sort_type) {
// Output should be the same size is v
for (t_index i = 0, loop_end = output.size(); i != loop_end; ++i)
output[i] = i;
Expand Down
52 changes: 32 additions & 20 deletions cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
#include <arrow/io/memory.h>

#ifdef PSP_ENABLE_WASM
// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>
// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>
#else
#include <arrow/csv/reader.h>
#include <arrow/csv/reader.h>
#endif

namespace perspective {
Expand All @@ -30,7 +30,8 @@ namespace apachearrow {
int64_t* out) const override {
size_t endptr;
std::string val(s, s + length);
int64_t value = std::stoll(static_cast<std::string>(val), &endptr, 10);
int64_t value
= std::stoll(static_cast<std::string>(val), &endptr, 10);
if (endptr != length) {
return false;
} else {
Expand All @@ -51,7 +52,8 @@ namespace apachearrow {
if (ARROW_PREDICT_FALSE(s[0] != '.')) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 1, 3, &millis))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::ParseUnsigned(s + 1, 3, &millis))) {
return false;
}

Expand All @@ -66,10 +68,12 @@ namespace apachearrow {
ParseTZ(const char* s, std::chrono::hours* out) {
uint8_t hours = 0;

if (ARROW_PREDICT_FALSE(s[0] != '+') && ARROW_PREDICT_FALSE(s[0] != '-')) {
if (ARROW_PREDICT_FALSE(s[0] != '+')
&& ARROW_PREDICT_FALSE(s[0] != '-')) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 1, 2, &hours))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::ParseUnsigned(s + 1, 2, &hours))) {
return false;
}

Expand Down Expand Up @@ -97,12 +101,14 @@ namespace apachearrow {
// "YYYY-MM-DD[ T]hh:mm:ss.sss"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(s, &ymd))) {
!arrow::internal::detail::ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
std::chrono::seconds seconds;
if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
return false;
}
std::chrono::milliseconds millis;
Expand All @@ -111,18 +117,21 @@ namespace apachearrow {
}

*out = arrow::internal::detail::ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + seconds + millis, unit);
arrow_vendored::date::sys_days(ymd) + seconds + millis,
unit);
return true;
} else if (length == 25) {
// "2008-09-15[ T]15:53:00+05:00"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(s, &ymd))) {
!arrow::internal::detail::ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
std::chrono::seconds seconds;
if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
return false;
}
std::chrono::hours tz;
Expand All @@ -131,7 +140,8 @@ namespace apachearrow {
}

*out = arrow::internal::detail::ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + tz + seconds, unit);
arrow_vendored::date::sys_days(ymd) + tz + seconds,
unit);
return true;
}
return false;
Expand All @@ -148,7 +158,8 @@ namespace apachearrow {
std::vector<std::shared_ptr<arrow::TimestampParser>> DATE_PARSERS{
std::make_shared<CustomISO8601Parser>(),
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime(
"%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
Expand All @@ -159,7 +170,8 @@ namespace apachearrow {
std::make_shared<UnixTimestampParser>(),
std::make_shared<CustomISO8601Parser>(),
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime(
"%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
Expand All @@ -169,8 +181,8 @@ namespace apachearrow {
parseAsArrowTimestamp(const std::string& input) {
for (auto candidate : DATE_PARSERS) {
int64_t datetime;
if (candidate->operator()(
input.c_str(), input.size(), arrow::TimeUnit::MILLI, &datetime)) {
if (candidate->operator()(input.c_str(), input.size(),
arrow::TimeUnit::MILLI, &datetime)) {
return datetime;
}
}
Expand Down
Loading