Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#4] Fix clobbered boolean rules #5

Merged
merged 4 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cljfmt.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{:sort-ns-references? true
:remove-consecutive-blank-lines? false
:extra-indents
{}}
54 changes: 1 addition & 53 deletions dev/user.clj
Original file line number Diff line number Diff line change
@@ -1,53 +1 @@
(ns user
(:require [com.nytimes.querqy.commonrules :as c]
[com.nytimes.querqy :as q]
[clojure.math :as math]))

(defn pin
"Pin the given IDs to the top of the result set in the order given. Should only
be used once within a given match rule."
[& ids]
(map-indexed
(fn [idx id]
(c/boost (- Float/MAX_VALUE idx) {:ids {:values [id]}}))
ids))

;; Let's assume we have some special thanksgiving related content that editorial
;; wants highly promoted, the documents with IDs 12345 and 5678. Rather than
;; tweak scores or sprinkle boosts around our rule set, we can instead use our
;; new pin rule which makes clear the intent of the boost and serves as
;; documentation for what we're trying to achieve with this rule.
(def rules
(c/rules-rewriter
(c/match "thanksgiving"
(pin "12345" "5678"))))

;; We can now emit a query which pins results to the top.

(def opts {:match/fields ["headline"]})

(q/emit (q/rewrite rules "thanksgiving recipes") opts)

{:function_score
{:query
{:bool
{:must [],
:should
[{:match {"headline" {:query "thanksgiving"}}}
{:match {"headline" {:query "recipes"}}}],
:must_not [],
:filter []}},
:functions
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}}

(def base
{:function_score
{:query
{:bool {:must [],
:should [],
:must_not [],
:filter []}},
:functions
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}})
(ns user)
143 changes: 78 additions & 65 deletions src/com/nytimes/querqy/commonrules.clj
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
(querqy.rewrite.commonrules.select.booleaninput BooleanInputParser)
(querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral)))

(set! *warn-on-reflection* true)

(defprotocol CommonRulesRewriterBuilder
(common-rules-rewriter* [this]))

Expand All @@ -27,12 +29,6 @@
(common-rules-rewriter* [_]
(throw (IllegalArgumentException. "Must provide rules to rules-rewriter"))))

(defn- flatten-rules
[fns]
(reduce (fn [accm afn] (if (coll? afn) (into accm afn) (conj accm afn)))
[]
fns))

(defn rules-rewriter
"Create a CommonRulesRewriter.

Expand All @@ -41,7 +37,7 @@
[& args]
(if (and (= 1 (count args)) (instance? URL (first args)))
(common-rules-rewriter* (first args))
(common-rules-rewriter* (flatten-rules args))))
(common-rules-rewriter* args)))

(defn rewriter-factory
[rules]
Expand Down Expand Up @@ -77,36 +73,51 @@
;; ----------------------------------------------------------------------
;; DSL

(extend-protocol CommonRulesRewriterBuilder
List
(common-rules-rewriter* [rules]
(let [rules-builder (TrieMapRulesCollectionBuilder. true)]
(doseq [rule-fn rules]
(rule-fn rules-builder))
(rewriter-factory (.build rules-builder)))))

(def ^:dynamic ^QuerqyParser *query-parser* parser/whitespace-parser)

(declare match*)
(defrecord Rule [input instructions])

(defn match*
"Create a "
[head & tail]
(->Rule head (vec tail)))

(defmacro match
"Create a match rule."
{:style/indent 1}
;; TODO LEFT/ RIGHT boundaries
"Create a rewriter rule from matching text input or boolean input followed by
any number of query transformations.

```clojure
;; Inject bar as a synonym to foo in any query.
(match \"foo\"
(synonym \"bar\"))
```"
[head & tail]
`(match* '~head (vec (flatten (vector ~@tail)))))
`(match* '~head ~@tail))

(defn- parse-string [string] (mapv #(LineParser/parseTerm %) (str/split string #"\s+")))
(defn- parse-string
[string]
(mapv #(LineParser/parseTerm %) (str/split string #"\s+")))

(defn- parse-query [query]
(cond (string? query) (.parse *query-parser* query)
(map? query) (model/rawq {:query query})))
(defn- parse-query
[query]
(cond
(string? query)
(.parse *query-parser* query)

(map? query)
(model/rawq {:query query})))

(defn delete? [obj] (instance? DeleteInstruction obj))
(defn delete?
[obj]
(instance? DeleteInstruction obj))

(defn delete [string] (DeleteInstruction. (parse-string string)))
(defn delete
[string]
(DeleteInstruction. (parse-string string)))

(defn synonym? [obj] (instance? SynonymInstruction obj))
(defn synonym?
[obj]
(instance? SynonymInstruction obj))

(defn synonym
"Create a synonym instruction."
Expand Down Expand Up @@ -154,41 +165,43 @@
not (wrap (cons NOT (parse-boolean-input terms)))
(map parse-boolean-input input))))

;;

(def rule-count (atom 0))

(defn ^:no-doc match*
;; implements match for use by match macro
[input instructions]
(let [ord (swap! rule-count inc)
compiled (Instructions. ord ord instructions)]
(fn [^TrieMapRulesCollectionBuilder rules-builder]
(cond
;; string rules
(string? input)
(let [simple-input (Input/parseSimpleInput input)]
(.addRule rules-builder
^Input$SimpleInput simple-input
^Instructions compiled))

;; boolean rules
(list? input)
(do
(when (some delete? instructions)
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))
(when (some synonym? instructions)
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))
(let [boolean-input-parser (BooleanInputParser.)
bool-input (Input$BooleanInput. (parse-boolean-input input)
boolean-input-parser
(pr-str input))]

(.applyInstructions bool-input compiled rules-builder)
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
(let [input (LineParser/parseInput (str/join \space (.getTerms literal)))]
(.addRule rules-builder
^Input$SimpleInput input
^BooleanInputLiteral literal)))))

:else (throw (IllegalArgumentException. "Can only parse a string or list as input"))))))
(defn parse-simple-input
^Input$SimpleInput
[string]
(Input/parseSimpleInput string))

(extend-protocol CommonRulesRewriterBuilder
List
(common-rules-rewriter* [rules]
(let [rules (flatten rules)
counter (atom 0)
rules-builder (TrieMapRulesCollectionBuilder. true)
boolean-input-parser (BooleanInputParser.)]
(doseq [{:keys [input instructions]} rules]
(let [ord (swap! counter inc)
id (str input "#" ord)
instructions (Instructions. ord id instructions)]
(cond
(string? input)
(.addRule rules-builder (parse-simple-input input) instructions)

(list? input)
(do
(when (some delete? instructions)
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))

(when (some synonym? instructions)
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))

(let [bool-input (Input$BooleanInput. (parse-boolean-input input) boolean-input-parser (pr-str input))]
;; inputPattern.applyInstructions(instructions, builder);
(.applyInstructions bool-input instructions rules-builder))))))

;; Add boolean literals at the end
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
(let [string (str/join \space (.getTerms literal))
input (parse-simple-input string)]
(.addRule rules-builder input literal)))

;;
(rewriter-factory (.build rules-builder)))))
2 changes: 1 addition & 1 deletion src/com/nytimes/querqy/model.clj
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
BooleanQuery
(datafy [^BooleanQuery q]
{:type BooleanQuery
:occur (.getOccur q)
:occur (occur->kw (.getOccur q))
:clauses (mapv datafy (.getClauses q))})

BoostQuery
Expand Down
6 changes: 6 additions & 0 deletions test/com/nytimes/querqy/common-rules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ A8 =>

(A11 AND (NOT B11)) =>
UP(2): C11

(best AND netflix AND show) =>
UP(2): netflix

(best AND amazon AND show) =>
UP(2): amazon
Loading