-
Notifications
You must be signed in to change notification settings - Fork 164
/
TextExtraction.js
149 lines (127 loc) · 4.68 KB
/
TextExtraction.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/**
* If you want to provide a custom regexp, this is the configuration to use.
* -- For historical reasons, all regexps are processed as if they have the global flag set.
* -- Use the nonExhaustiveModeMaxMatchCount property to match a limited number of matches.
* Note: any additional keys/props are permitted, and will be returned as-is!
* @typedef {Object} CustomParseShape
* @property {RegExp} pattern
* @property {number} [nonExhaustiveModeMaxMatchCount] Enables "non-exhaustive mode", where you can limit how many matches are found. -- Must be a positive integer or Infinity matches are permitted
* @property {Function} [renderText] arbitrary function to rewrite the matched string into something else
* @property {Function} [onPress]
* @property {Function} [onLongPress]
*/
/**
* Class to encapsulate the business logic of converting text into matches & props
*/
class TextExtraction {
/**
* @param {String} text - Text to be parsed
* @param {CustomParseShape[]} patterns - Patterns to be used when parsed,
* any extra attributes, will be returned from parse()
*/
constructor(text, patterns) {
this.text = text;
this.patterns = patterns || [];
}
/**
* Returns parts of the text with their own props
* @public
* @return {Object[]} - props for all the parts of the text
*/
parse() {
let parsedTexts = [{ children: this.text }];
this.patterns.forEach((pattern) => {
let newParts = [];
const tmp = pattern.nonExhaustiveModeMaxMatchCount || 0;
const numberOfMatchesPermitted = Math.min(
Math.max(Number.isInteger(tmp) ? tmp : 0, 0) ||
Number.POSITIVE_INFINITY,
Number.POSITIVE_INFINITY,
);
let currentMatches = 0;
parsedTexts.forEach((parsedText) => {
// Only allow for now one parsing
if (parsedText._matched) {
newParts.push(parsedText);
return;
}
let parts = [];
let textLeft = parsedText.children;
let indexOfMatchedString = 0;
/** @type {RegExpExecArray} */
let matches;
// Global RegExps are stateful, this makes it start at 0 if reused
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
pattern.pattern.lastIndex = 0;
while (textLeft && (matches = pattern.pattern.exec(textLeft))) {
let previousText = textLeft.substr(0, matches.index);
indexOfMatchedString = matches.index;
if (++currentMatches > numberOfMatchesPermitted) {
// Abort if we've exhausted our number of matches
break;
}
parts.push({ children: previousText });
parts.push(
this.getMatchedPart(
pattern,
matches[0],
matches,
indexOfMatchedString,
),
);
textLeft = textLeft.substr(matches.index + matches[0].length);
indexOfMatchedString += matches[0].length - 1;
// Global RegExps are stateful, this makes it operate on the "remainder" of the string
pattern.pattern.lastIndex = 0;
}
parts.push({ children: textLeft });
newParts.push(...parts);
});
parsedTexts = newParts;
});
// Remove _matched key.
parsedTexts.forEach((parsedText) => delete parsedText._matched);
return parsedTexts.filter((t) => !!t.children);
}
// private
/**
* @protected
* @param {ParseShape} matchedPattern - pattern configuration of the pattern used to match the text
* @param {String} text - Text matching the pattern
* @param {String[]} matches - Result of the RegExp.exec
* @param {Integer} index - Index of the matched string in the whole string
* @return {Object} props for the matched text
*/
getMatchedPart(matchedPattern, text, matches, index) {
let props = {};
Object.keys(matchedPattern).forEach((key) => {
if (
key === 'pattern' ||
key === 'renderText' ||
key === 'nonExhaustiveModeMaxMatchCount'
) {
return;
}
if (typeof matchedPattern[key] === 'function') {
// Support onPress / onLongPress functions
props[key] = () => matchedPattern[key](text, index);
} else {
// Set a prop with an arbitrary name to the value in the match-config
props[key] = matchedPattern[key];
}
});
let children = text;
if (
matchedPattern.renderText &&
typeof matchedPattern.renderText === 'function'
) {
children = matchedPattern.renderText(text, matches);
}
return {
...props,
children: children,
_matched: true,
};
}
}
export default TextExtraction;