Skip to content

Commit

Permalink
[#4] Fix clobbered boolean rules
Browse files Browse the repository at this point in the history
This requires sharing the BooleanInputParser among all the rules within the
rules-rewriter and adding the input literals all at once at the end.
  • Loading branch information
jvia committed Jan 11, 2024
1 parent 550b126 commit 76ed2b8
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 237 deletions.
54 changes: 1 addition & 53 deletions dev/user.clj
Original file line number Diff line number Diff line change
@@ -1,53 +1 @@
(ns user
(:require
[com.nytimes.querqy :as q]
[com.nytimes.querqy.commonrules :as c]))

(defn pin
"Pin the given IDs to the top of the result set in the order given. Should only
be used once within a given match rule."
[& ids]
(map-indexed
(fn [idx id]
(c/boost (- Float/MAX_VALUE idx) {:ids {:values [id]}}))
ids))

;; Let's assume we have some special thanksgiving related content that editorial
;; wants highly promoted, the documents with IDs 12345 and 5678. Rather than
;; tweak scores or sprinkle boosts around our rule set, we can instead use our
;; new pin rule which makes clear the intent of the boost and serves as
;; documentation for what we're trying to achieve with this rule.
(def rules
(c/rules-rewriter
(c/match "thanksgiving"
(pin "12345" "5678"))))

;; We can now emit a query which pins results to the top.

(def opts {:match/fields ["headline"]})

(q/emit (q/rewrite rules "thanksgiving recipes") opts)

{:function_score
{:query
{:bool
{:must [],
:should
[{:match {"headline" {:query "thanksgiving"}}}
{:match {"headline" {:query "recipes"}}}],
:must_not [],
:filter []}},
:functions
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}}

(def base
{:function_score
{:query
{:bool {:must [],
:should [],
:must_not [],
:filter []}},
:functions
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}})
(ns user)
185 changes: 99 additions & 86 deletions src/com/nytimes/querqy/commonrules.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@
"CommonRules based rewriter"
(:refer-clojure :exclude [filter])
(:require
[clojure.java.io :as io]
[clojure.string :as str]
[com.nytimes.querqy.model :as model]
[com.nytimes.querqy.parser :as parser])
[clojure.java.io :as io]
[clojure.string :as str]
[com.nytimes.querqy.model :as model]
[com.nytimes.querqy.parser :as parser])
(:import
(java.io Reader)
(java.net URL)
(java.util List UUID)
(querqy.model Input Input$BooleanInput Input$SimpleInput)
(querqy.parser QuerqyParser)
(querqy.rewrite RewriterFactory)
(querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory)
(querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder)
(querqy.rewrite.commonrules.select SelectionStrategyFactory)
(querqy.rewrite.commonrules.select.booleaninput BooleanInputParser)
(querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral)))
(java.io Reader)
(java.net URL)
(java.util List UUID)
(querqy.model Input Input$BooleanInput Input$SimpleInput)
(querqy.parser QuerqyParser)
(querqy.rewrite RewriterFactory)
(querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory)
(querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder)
(querqy.rewrite.commonrules.select SelectionStrategyFactory)
(querqy.rewrite.commonrules.select.booleaninput BooleanInputParser)
(querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral)))

(set! *warn-on-reflection* true)

(defprotocol CommonRulesRewriterBuilder
(common-rules-rewriter* [this]))
Expand All @@ -27,12 +29,6 @@
(common-rules-rewriter* [_]
(throw (IllegalArgumentException. "Must provide rules to rules-rewriter"))))

(defn- flatten-rules
[fns]
(reduce (fn [accm afn] (if (coll? afn) (into accm afn) (conj accm afn)))
[]
fns))

(defn rules-rewriter
"Create a CommonRulesRewriter.
Expand All @@ -41,15 +37,15 @@
[& args]
(if (and (= 1 (count args)) (instance? URL (first args)))
(common-rules-rewriter* (first args))
(common-rules-rewriter* (flatten-rules args))))
(common-rules-rewriter* args)))

(defn rewriter-factory
[rules]
(proxy [RewriterFactory] [(str (UUID/randomUUID))]
(createRewriter [_ _]
(CommonRulesRewriter.
rules
SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY))
rules
SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY))
(getCacheableGenerableTerms [] #{})))

;; ----------------------------------------------------------------------
Expand All @@ -63,10 +59,10 @@
ignore-case true
parser (WhiteSpaceQuerqyParserFactory.)}}]
(let [rules-parser (SimpleCommonRulesParser.
^Reader stream
^boolean boolean-input
^QuerqyParserFactory parser
^boolean ignore-case)]
^Reader stream
^boolean boolean-input
^QuerqyParserFactory parser
^boolean ignore-case)]
(.parse rules-parser))))

(extend-protocol CommonRulesRewriterBuilder
Expand All @@ -77,36 +73,51 @@
;; ----------------------------------------------------------------------
;; DSL

(extend-protocol CommonRulesRewriterBuilder
List
(common-rules-rewriter* [rules]
(let [rules-builder (TrieMapRulesCollectionBuilder. true)]
(doseq [rule-fn rules]
(rule-fn rules-builder))
(rewriter-factory (.build rules-builder)))))

(def ^:dynamic ^QuerqyParser *query-parser* parser/whitespace-parser)

(declare match*)
(defrecord Rule [input instructions])

(defn match*
"Create a "
[head & tail]
(->Rule head (vec tail)))

(defmacro match
"Create a match rule."
{:style/indent 1}
;; TODO LEFT/ RIGHT boundaries
"Create a rewriter rule from matching text input or boolean input followed by
any number of query transformations.
```clojure
;; Inject bar as a synonym to foo in any query.
(match \"foo\"
(synonym \"bar\"))
```"
[head & tail]
`(match* '~head (vec (flatten (vector ~@tail)))))
`(match* '~head ~@tail))

(defn- parse-string
[string]
(mapv #(LineParser/parseTerm %) (str/split string #"\s+")))

(defn- parse-string [string] (mapv #(LineParser/parseTerm %) (str/split string #"\s+")))
(defn- parse-query
[query]
(cond
(string? query)
(.parse *query-parser* query)

(defn- parse-query [query]
(cond (string? query) (.parse *query-parser* query)
(map? query) (model/rawq {:query query})))
(map? query)
(model/rawq {:query query})))

(defn delete? [obj] (instance? DeleteInstruction obj))
(defn delete?
[obj]
(instance? DeleteInstruction obj))

(defn delete [string] (DeleteInstruction. (parse-string string)))
(defn delete
[string]
(DeleteInstruction. (parse-string string)))

(defn synonym? [obj] (instance? SynonymInstruction obj))
(defn synonym?
[obj]
(instance? SynonymInstruction obj))

(defn synonym
"Create a synonym instruction."
Expand Down Expand Up @@ -154,41 +165,43 @@
not (wrap (cons NOT (parse-boolean-input terms)))
(map parse-boolean-input input))))

;;

(def rule-count (atom 0))

(defn ^:no-doc match*
;; implements match for use by match macro
[input instructions]
(let [ord (swap! rule-count inc)
compiled (Instructions. ord ord instructions)]
(fn [^TrieMapRulesCollectionBuilder rules-builder]
(cond
;; string rules
(string? input)
(let [simple-input (Input/parseSimpleInput input)]
(.addRule rules-builder
^Input$SimpleInput simple-input
^Instructions compiled))

;; boolean rules
(list? input)
(do
(when (some delete? instructions)
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))
(when (some synonym? instructions)
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))
(let [boolean-input-parser (BooleanInputParser.)
bool-input (Input$BooleanInput. (parse-boolean-input input)
boolean-input-parser
(pr-str input))]

(.applyInstructions bool-input compiled rules-builder)
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
(let [input (LineParser/parseInput (str/join \space (.getTerms literal)))]
(.addRule rules-builder
^Input$SimpleInput input
^BooleanInputLiteral literal)))))

:else (throw (IllegalArgumentException. "Can only parse a string or list as input"))))))
(defn parse-simple-input
^Input$SimpleInput
[string]
(Input/parseSimpleInput string))

(extend-protocol CommonRulesRewriterBuilder
List
(common-rules-rewriter* [rules]
(let [rules (flatten rules)
counter (atom 0)
rules-builder (TrieMapRulesCollectionBuilder. true)
boolean-input-parser (BooleanInputParser.)]
(doseq [{:keys [input instructions]} rules]
(let [ord (swap! counter inc)
id (str input "#" ord)
instructions (Instructions. ord id instructions)]
(cond
(string? input)
(.addRule rules-builder (parse-simple-input input) instructions)

(list? input)
(do
(when (some delete? instructions)
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))

(when (some synonym? instructions)
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))

(let [bool-input (Input$BooleanInput. (parse-boolean-input input) boolean-input-parser (pr-str input))]
;; inputPattern.applyInstructions(instructions, builder);
(.applyInstructions bool-input instructions rules-builder))))))

;; Add boolean literals at the end
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
(let [string (str/join \space (.getTerms literal))
input (parse-simple-input string)]
(.addRule rules-builder input literal)))

;;
(rewriter-factory (.build rules-builder)))))
10 changes: 5 additions & 5 deletions src/com/nytimes/querqy/model.clj
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
(ns com.nytimes.querqy.model
"Builders for classes in the `querqy.model` package."
(:require
[clojure.core.protocols :as cp]
[clojure.datafy :refer [datafy]]
[clojure.string :as str])
[clojure.core.protocols :as cp]
[clojure.datafy :refer [datafy]]
[clojure.string :as str])
(:import
(querqy.model BooleanParent BooleanQuery BoostQuery BoostedTerm Clause Clause$Occur DisjunctionMaxQuery ExpandedQuery Input$SimpleInput MatchAllQuery QuerqyQuery Query Term)))
(querqy.model BooleanParent BooleanQuery BoostQuery BoostedTerm Clause Clause$Occur DisjunctionMaxQuery ExpandedQuery Input$SimpleInput MatchAllQuery QuerqyQuery Query Term)))

(def should Clause$Occur/SHOULD)
(def must Clause$Occur/MUST)
Expand Down Expand Up @@ -112,7 +112,7 @@
BooleanQuery
(datafy [^BooleanQuery q]
{:type BooleanQuery
:occur (.getOccur q)
:occur (occur->kw (.getOccur q))
:clauses (mapv datafy (.getClauses q))})

BoostQuery
Expand Down
6 changes: 6 additions & 0 deletions test/com/nytimes/querqy/common-rules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ A8 =>

(A11 AND (NOT B11)) =>
UP(2): C11

(best AND netflix AND show) =>
UP(2): netflix

(best AND amazon AND show) =>
UP(2): amazon
Loading

0 comments on commit 76ed2b8

Please sign in to comment.