Skip to content

Commit

Permalink
feat(config): enable/disable prefix matching numerals feature via con…
Browse files Browse the repository at this point in the history
…fig flag (pelias#1596)
  • Loading branch information
missinglink authored Feb 1, 2022
1 parent 385687a commit a5aabc2
Showing 1 changed file with 33 additions and 7 deletions.
40 changes: 33 additions & 7 deletions sanitizer/_tokenizer.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,33 @@
const _ = require('lodash');
const config = require('pelias-config').generate();

// optionally disable prefix matching numerals
const feature = {
prefix_match_numerals: config.get('api.feature.prefix_match_numerals', true)
};

// only in the event that the final token is a numeral, this function
// decides whether that numeric token should be considered complete or not.
const shouldMarkNumericFinalTokenAsComplete = (clean) => {

// this feature may optionally be disabled via config
// in this situation the numeric suffix is always considered complete
if (feature.prefix_match_numerals === false) { return true; }

// inspect request layers
const layers = _.get(clean, 'layers', []);

// user has not explicitely specified any layers.
// this will include addresses, so disable for the same reasons as below.
if (!_.isArray(layers) || _.isEmpty(layers)) { return true; }

// user has explicitely requested the address layer.
// avoid prefix matching house number numerals.
if (layers.includes('address')) { return true; }

// default behaviour is to allow prefix matching numerals
return false;
};

/**
simplified version of the elaticsearch tokenizer, used in order to
Expand Down Expand Up @@ -46,13 +75,10 @@ function _sanitize( raw, clean ){
}
}

// if requesting the address layer AND final character is a numeral then consider
// all tokens as complete in order to avoid prefix matching numerals.
const layers = _.get(clean, 'layers', []);
if (!_.isArray(layers) || _.isEmpty(layers) || layers.includes('address')) {
if (/[0-9]$/.test(text)) {
parserConsumedAllTokens = true;
}
// if the final character is a numeral then consider all tokens
// as complete in order to avoid prefix matching numerals.
if (/[0-9]$/.test(text) && shouldMarkNumericFinalTokenAsComplete(clean)) {
parserConsumedAllTokens = true;
}

// always set 'clean.tokens*' arrays for consistency and to avoid upstream errors.
Expand Down

0 comments on commit a5aabc2

Please sign in to comment.