Skip to content

Commit

Permalink
🐛 Limit NUMBERED_REFERENCE_REGEX for zero or one time
Browse files Browse the repository at this point in the history
Fixes #58
  • Loading branch information
nipunsadvilkar committed May 29, 2020
1 parent 833062f commit 382d04b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pysbd/lang/common/numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Common(object):

# https://rubular.com/r/UkumQaILKbkeyc
# https://github.com/diasks2/pragmatic_segmenter/commit/d9ec1a352aff92b91e2e572c30bb9561eb42c703
NUMBERED_REFERENCE_REGEX = r'(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)*\d{1,3}))(\s)(?=[A-Z])'
NUMBERED_REFERENCE_REGEX = r'(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)?\d{1,3}))(\s)(?=[A-Z])'

# # Rubular: http://rubular.com/r/yqa4Rit8EY
PossessiveAbbreviationRule = Rule(r"\.(?='s\s)|\.(?='s$)|\.(?='s\Z)", '∯')
Expand Down
4 changes: 3 additions & 1 deletion tests/regression/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
['As an example of a different special-purpose mechanism, we have introduced a methodology for letting donors make their donations to charities conditional on donations by other donors (who, in turn, can make their donations conditional) [70].', 'We have used this mechanism to collect money for Indian Ocean Tsunami and Hurricane Katrina victims.', "We have also introduced a more general framework for negotiation when one agent's actions have a direct effect (externality) on the other agents' utilities [69].", 'Both the charities and externalities methodologies require the solution of NP-hard optimization problems in general, but there are some natural tractable cases as well as effective MIP formulations.', 'Recently, Ghosh and Mahdian [86] at Yahoo! Research extended our charities work, and based on this a web-based system for charitable donations was built at Yahoo!']),
('#39', "T stands for the vector transposition. As shown in Fig. ??",
["T stands for the vector transposition.", "As shown in Fig. ??"]),
('#39', 'Fig. ??', ['Fig. ??'])
('#39', 'Fig. ??', ['Fig. ??']),
('#58', 'Rok bud.2027777983834843834843042003200220012000199919981997199619951994199319921991199019891988198042003200220012000199919981997199619951994199319921991199019891988198',
['Rok bud.2027777983834843834843042003200220012000199919981997199619951994199319921991199019891988198042003200220012000199919981997199619951994199319921991199019891988198'])
]

TEST_ISSUE_DATA_CHAR_SPANS = [
Expand Down

0 comments on commit 382d04b

Please sign in to comment.