Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add index(), col(), and vlookup() to ExprTk #2374

Merged
merged 4 commits into from
Oct 13, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
restore sentinel strings and unintern string args to col
timbess committed Oct 10, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit dbe54312fd289268218acbc11db2d456c9e6b512
2 changes: 0 additions & 2 deletions cpp/perspective/src/cpp/computed_expression.cpp
Original file line number Diff line number Diff line change
@@ -476,7 +476,6 @@ t_computed_function_store::t_computed_function_store(t_expression_vocab& vocab,
computed_function::replace(vocab, regex_mapping, is_type_validator))
, m_replace_all_fn(computed_function::replace_all(
vocab, regex_mapping, is_type_validator))
, m_add_one_fn(computed_function::add_one())
, m_index_fn(computed_function::index(pkey_map, source_table, row_idx))
, m_col_fn(computed_function::col(
vocab, is_type_validator, source_table, row_idx))
@@ -516,7 +515,6 @@ t_computed_function_store::register_computed_functions(
sym_table.add_function("month_of_year", m_month_of_year_fn);
sym_table.add_function("today", computed_function::today);
sym_table.add_function("now", computed_function::now);
sym_table.add_function("add_one", m_add_one_fn);

// String functions
sym_table.add_function("intern", m_intern_fn);
98 changes: 16 additions & 82 deletions cpp/perspective/src/cpp/computed_function.cpp
Original file line number Diff line number Diff line change
@@ -48,6 +48,12 @@ namespace computed_function {
std::string temp_str
= std::string(temp_string.begin(), temp_string.end());

if (m_is_type_validator) {
// Return the sentinel value which indicates a valid output from
// type checking, as the output value is not STATUS_CLEAR
return m_sentinel;
}

// Intern the string into the vocabulary.
rval.set(m_expression_vocab.intern(temp_str));
return rval;
@@ -2182,55 +2188,6 @@ namespace computed_function {
return rval;
}

add_one::add_one()
: exprtk::igeneric_function<t_tscalar>("T") {}
add_one::~add_one() {}

t_tscalar
add_one::operator()(t_parameter_list params) {
t_tscalar rval;
rval.clear();

t_generic_type& gt = params[0];
t_scalar_view temp(gt);
t_tscalar temp_scalar;

temp_scalar.set(temp());
t_dtype dtype = temp_scalar.get_dtype();

switch (dtype) {
case DTYPE_INT8:
case DTYPE_INT16:
case DTYPE_INT32:
rval.set(temp_scalar.to_int32() + 1);
break;
case DTYPE_INT64:
rval.set(temp_scalar.to_int64() + 1);
break;
case DTYPE_UINT8:
case DTYPE_UINT16:
case DTYPE_UINT32:
case DTYPE_UINT64:
rval.set(temp_scalar.to_uint64() + 1);
break;
case DTYPE_FLOAT32:
case DTYPE_FLOAT64:
rval.set(temp_scalar.to_double() + 1);
break;
case DTYPE_NONE:
rval.set(0.0);
default:
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!temp_scalar.is_valid()) {
return rval;
}

return rval;
}

index::index(const t_gstate::t_mapping& pkey_map,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("Z")
@@ -2254,7 +2211,7 @@ namespace computed_function {

col::col(t_expression_vocab& expression_vocab, bool is_type_validator,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("T")
: exprtk::igeneric_function<t_tscalar>("S")
, m_expression_vocab(expression_vocab)
, m_is_type_validator(is_type_validator)
, m_source_table(source_table)
@@ -2266,26 +2223,8 @@ namespace computed_function {
t_tscalar rval;
rval.clear();

t_generic_type& gt = parameters[0];
t_scalar_view temp(gt);
t_tscalar temp_scalar;

temp_scalar.set(temp());
t_dtype dtype = temp_scalar.get_dtype();

if (dtype != DTYPE_STR) {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!temp_scalar.is_valid()) {
return rval;
}

std::string temp_str = temp_scalar.to_string();

// rval.set(m_expression_vocab.intern(temp_str));
// return rval;
t_string_view _colname(parameters[0]);
std::string temp_str = std::string(_colname.begin(), _colname.end());

if (!m_source_table->get_schema().has_column(temp_str)) {
rval.m_status = STATUS_CLEAR;
@@ -2302,7 +2241,7 @@ namespace computed_function {
vlookup::vlookup(t_expression_vocab& expression_vocab,
bool is_type_validator, std::shared_ptr<t_data_table> source_table,
t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("TT")
: exprtk::igeneric_function<t_tscalar>("ST")
, m_expression_vocab(expression_vocab)
, m_is_type_validator(is_type_validator)
, m_source_table(source_table)
@@ -2314,34 +2253,29 @@ namespace computed_function {
t_tscalar rval;
rval.clear();

t_generic_type& column_gt = parameters[0];
t_scalar_view column_gt_view(column_gt);
t_tscalar column_name;

column_name.set(column_gt_view());
t_dtype column_name_dtype = column_name.get_dtype();
t_string_view _colname(parameters[0]);
auto column_name = std::string(_colname.begin(), _colname.end());

t_generic_type& index_gt = parameters[1];
t_scalar_view index_gt_view(index_gt);
t_tscalar index;

index.set(index_gt_view());

if (column_name_dtype != DTYPE_STR || !index.is_numeric()) {
if (!index.is_numeric()) {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!column_name.is_valid() || !index.is_valid()) {
if (!index.is_valid()) {
return rval;
}

std::string col_name_str = column_name.to_string();
if (!m_source_table->get_schema().has_column(col_name_str)) {
if (!m_source_table->get_schema().has_column(column_name)) {
rval.m_status = STATUS_CLEAR;
return rval;
}
auto col = m_source_table->get_const_column(col_name_str);
auto col = m_source_table->get_const_column(column_name);

if (m_is_type_validator) {
rval.m_status = STATUS_VALID;
Original file line number Diff line number Diff line change
@@ -223,7 +223,6 @@ struct PERSPECTIVE_EXPORT t_computed_function_store {
computed_function::substring m_substring_fn;
computed_function::replace m_replace_fn;
computed_function::replace_all m_replace_all_fn;
computed_function::add_one m_add_one_fn;
computed_function::index m_index_fn;
computed_function::col m_col_fn;
computed_function::vlookup m_vlookup_fn;
3 changes: 0 additions & 3 deletions cpp/perspective/src/include/perspective/computed_function.h
Original file line number Diff line number Diff line change
@@ -185,9 +185,6 @@ namespace computed_function {
// Length of the string
FUNCTION_HEADER(length)

// Add one to a column
FUNCTION_HEADER(add_one)

struct index : public exprtk::igeneric_function<t_tscalar> {
index(const t_pkey_mapping& pkey_map,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx);
11 changes: 11 additions & 0 deletions packages/perspective/src/js/perspective.js
Original file line number Diff line number Diff line change
@@ -1495,6 +1495,17 @@ export default function (Module) {
)}'${value}'${match.substring(intern_idx + intern_fn.length)}`;
};

parsed_expression_string = parsed_expression_string.replace(
/(col|vlookup)\((.*)\)/g,
(match, _, args, __) => {
const start = match.indexOf(args);
return `${match.substring(0, start)}${args.replace(
/intern\('([^']*)'\)/g,
"'$1'"
)})`;
}
);

// Replace intern() for bucket and regex functions that take
// a string literal parameter and does not work if that param is
// interned. TODO: this is clumsy and we should have a better
3 changes: 2 additions & 1 deletion python/perspective/perspective/src/view.cpp
Original file line number Diff line number Diff line change
@@ -211,7 +211,8 @@ namespace binding {
std::shared_ptr<t_computed_expression> expression
= t_computed_expression_parser::precompute(expression_alias,
expression_string, parsed_expression_string, column_ids,
schema, expression_vocab, regex_mapping);
gnode.get_table_sptr(), gnode.get_pkey_map(), schema,
expression_vocab, regex_mapping);

expressions.push_back(expression);
schema->add_column(expression_alias, expression->get_dtype());
5 changes: 5 additions & 0 deletions python/perspective/perspective/table/_utils.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,8 @@
EXPRESSION_COLUMN_NAME_REGEX = re.compile(r"\"(.*?[^\\])\"")
STRING_LITERAL_REGEX = re.compile(r"'(.*?[^\\])'")
FUNCTION_LITERAL_REGEX = re.compile(r"(bucket|match|match_all|search|indexof)\(.*?,\s*(intern\(\'(.+)\'\)).*\)")
MATCH_INTERNED_ARGS_REGEX = re.compile(r"intern\('([^']*)'\)")
UNINTERNED_FNS_REGEX = re.compile(r"(col|vlookup)\((.*)\)")
REPLACE_FN_REGEX = re.compile(r"(replace_all|replace)\(.*?,\s*(intern\(\'(.*)\'\)),.*\)")
BOOLEAN_LITERAL_REGEX = re.compile(r"([a-zA-Z_]+[a-zA-Z0-9_]*)")

@@ -197,6 +199,9 @@ def _parse_expression_strings(expressions):
parsed,
)

# remove _all_ `intern()` calls in these functions.
parsed = re.sub(UNINTERNED_FNS_REGEX, lambda match: re.sub(MATCH_INTERNED_ARGS_REGEX, r"'\1'", match.group(0)), parsed)

# remove the `intern()` in bucket and regex functions that take
# string literal parameters. TODO this logic should be centralized
# in C++ instead of being duplicated.
5 changes: 0 additions & 5 deletions rust/perspective-viewer/src/rust/exprtk/language.rs
Original file line number Diff line number Diff line change
@@ -395,11 +395,6 @@ thread_local! {
insert_text: "replace(${1:string}, ${2:pattern}, ${3:replacer})",
documentation: "Replaces all non-overlapping matches of pattern in string with replacer, or return the original string if no replaces were made.",
},
CompletionItemSuggestion {
label: "add_one",
insert_text: "add_one(${1:uint64})",
documentation: "Adds one to a number",
},
CompletionItemSuggestion {
label: "index",
insert_text: "index()",