Skip to content

Commit

Permalink
Merge pull request #208 from tostegroo/improvement/sub-algorithm
Browse files Browse the repository at this point in the history
New sub/person substitution algorithm.
  • Loading branch information
kirsle authored Feb 23, 2017
2 parents 79206aa + 6a48e82 commit 9ac8704
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 28 deletions.
80 changes: 52 additions & 28 deletions src/brain.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,6 @@ class Brain
# Run substitutions against a message. `type` is either "sub" or "person" for
# the type of substitution to run.
substitute: (msg, type) ->
result = ""

# Safety checking.
if not @master._sorted[type]
Expand All @@ -997,40 +996,65 @@ class Brain
# Get the substitutions map.
subs = if type is "sub" then @master._sub else @master._person

# Make placeholders each time we substitute something.
ph = []
pi = 0

for pattern in @master._sorted[type]
result = subs[pattern]
qm = utils.quotemeta pattern
# Get the max number of words in sub/person to minimize interations
maxwords = if type is "sub" then @master._submax else @master._personmax

# Make a placeholder.
ph.push result
placeholder = "\x00<#{pi}>\x00"
pi++
result = ""

# Run substitutions.
msg = msg.replace(new RegExp("^#{qm}$", "g"), placeholder)
msg = msg.replace(new RegExp("^#{qm}([^a-zA-Z0-9_>]+)", "g"), "#{placeholder}$1")
msg = msg.replace(new RegExp("([^a-zA-Z0-9_<]+)#{qm}([^a-zA-Z0-9_>]+)", "g"), "$1#{placeholder}$2")
msg = msg.replace(new RegExp("([^a-zA-Z0-9_<]+)#{qm}$", "g"), "$1#{placeholder}")
# Take the original message with no punctuation
if @master.unicodePunctuation?
pattern = msg.replace(@master.unicodePunctuation, "")
else
pattern = msg.replace(/[.,!?;:]/g, "")

# Convert the placeholders back in.
tries = 0
while msg.indexOf("\x00") > -1
tries++
if tries > 50
@warn "Too many loops in substitution placeholders!"
giveup = 0
subgiveup = 0

# Look for words/phrases until there is no "spaces" in pattern
while pattern.indexOf(" ") > -1
giveup++
# Give up if there are too many substitutions (for safety)
if giveup >= 1000
@warn "Too many loops when handling substitutions!"
break

match = msg.match("\\x00<(.+?)>\\x00")
if match
cap = parseInt(match[1])
result = ph[cap]
msg = msg.replace(new RegExp("\x00<#{cap}>\x00", "g"), result)
li = utils.nIndexOf(pattern, " ", maxwords)
subpattern = pattern.substring(0, li)

return msg
# If finds the pattern in sub object replace and stop to look
result = subs[subpattern];
if result!=undefined
msg = msg.replace(subpattern, result)
else
# Otherwise Look for substitutions in a subpattern
while subpattern.indexOf(" ") > -1
subgiveup++
# Give up if there are too many substitutions (for safety)
if subgiveup >= 1000
@warn("Too many loops when handling substitutions!")
break

li = subpattern.lastIndexOf(" ");
subpattern = subpattern.substring(0, li);

# If finds the subpattern in sub object replace and stop to look
result = subs[subpattern];
if result!=undefined
msg = msg.replace(subpattern, result)
break

tries++;

fi = pattern.indexOf(" ")
pattern = pattern.substring(fi+1)
tries++

# After all loops, see if just one word is in the pattern
result = subs[pattern]
if result!=undefined
msg = msg.replace(pattern, result)

return msg

module.exports = Brain
6 changes: 6 additions & 0 deletions src/rivescript.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ class RiveScript
@_global = {} # 'global' variables
@_var = {} # 'bot' variables
@_sub = {} # 'sub' substitutions
@_submax = 1 # 'submax' max words in sub object
@_person = {} # 'person' substitutions
@_personmax= 1 # 'personmax' max words in person object
@_array = {} # 'array' variables
@_users = {} # 'user' variables
@_freeze = {} # frozen 'user' variables
Expand Down Expand Up @@ -463,6 +465,8 @@ class RiveScript
continue unless ast.begin.hasOwnProperty type
internal = "_#{type}" # so "global" maps to this._global
for name, value of vars
if type=='sub' || type=='person'
@[internal+"max"] = Math.max(@[internal+"max"], name.split(" ").length);
continue unless vars.hasOwnProperty name
if value is "<undef>"
delete @[internal][name]
Expand Down Expand Up @@ -706,6 +710,7 @@ class RiveScript
if value is undefined
delete @_sub[name]
else
@_submax = Math.max(name.split(' ').length, @_submax)
@_sub[name] = value

##
Expand All @@ -718,6 +723,7 @@ class RiveScript
if value is undefined
delete @_person[name]
else
@_personmax = Math.max(name.split(' ').length, @_personmax)
@_person[name] = value

##
Expand Down
16 changes: 16 additions & 0 deletions src/utils.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,19 @@ exports.isAPromise = (obj) ->
typeof obj.then is 'function' and
typeof obj.catch is 'function' and
typeof obj.finally is 'function'

##
# int nIndexOf (string, string match, int index)
#
# Finds a match in a string at a given index
#
# Usage:
# string = "My name is Rive"
# match = " "
# index = 2
# return = 7
#
# Summary: It will look for a second space in the string
##
exports.nIndexOf = (string, match, index) ->
return string.split(match, index).join(match).length

0 comments on commit 9ac8704

Please sign in to comment.