Skip to content


Add initial Cluster research
Browse files Browse the repository at this point in the history
  • Loading branch information
ptaoussanis committed Mar 2, 2023
1 parent 4c128e8 commit 8dd2670
Show file tree
Hide file tree
Showing 3 changed files with 494 additions and 158 deletions.
14 changes: 10 additions & 4 deletions src/taoensso/carmine_v4.clj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
[taoensso.carmine-v4.utils :as utils]
[taoensso.carmine-v4.opts :as opts]
[taoensso.carmine-v4.conns :as conns]
[taoensso.carmine-v4.sentinel :as sentinel]))
[taoensso.carmine-v4.sentinel :as sentinel]
[taoensso.carmine-v4.cluster :as cluster]))

(enc/assert-min-encore-version [3 39 0])

Expand All @@ -32,8 +33,9 @@

;;;; TODO

;; - Investigate Cluster
;; x Investigate Cluster
;; - Pause v4 work for now?
;; - Implement Cluster?

;; - Common & core util to parse-?marked-ba -> [<kind> <payload>]
;; - Core: new Pub/Sub API
Expand All @@ -53,7 +55,8 @@
;; - Refactor commands, add modules support
;; - Refactor pub/sub, etc. (note RESP2 vs RESP3 differences)
;; - Refactor helpers API, etc.
;; - Consider later refactoring mq?
;; - Modern MQ?
;; - Modern Tundra?

;; - Plan for ->v4 upgrade with back compatibility? ^{:deprecated <str>}
;; - v4 wiki with changes, migration, new features, examples, etc.
Expand Down Expand Up @@ -264,7 +267,10 @@
(enc/defalias conn-manager-init! conns/mgr-init!)
(enc/defalias conn-manager-ready? conns/mgr-ready?)
(enc/defalias conn-manager-close! conns/mgr-close!)
(enc/defalias conn-manager-master-changed! conns/mgr-master-changed!)))
(enc/defalias conn-manager-master-changed! conns/mgr-master-changed!))

(do ; Cluster
(enc/defalias cluster/cluster-key)))

;;;; Core API (main entry point to Carmine)

Expand Down
272 changes: 272 additions & 0 deletions src/taoensso/carmine_v4/cluster.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
(ns taoensso.carmine-v4.cluster
"Private ns, implementation detail.
Implementation of the Redis Cluster protocol,
{:author "Peter Taoussanis (@ptaoussanis)"}
[clojure.test :as test :refer [deftest testing is]]
[taoensso.encore :as enc :refer [have have? throws?]]
;;[taoensso.carmine-v4.utils :as utils]
;;[taoensso.carmine-v4.conns :as conns]
[taoensso.carmine-v4.resp.common :as com]
;;[taoensso.carmine-v4.resp :as resp]
;;[taoensso.carmine-v4.opts :as opts]

[java.util.concurrent.atomic AtomicLong]))

(remove-ns 'taoensso.carmine-v4.cluster)
(test/run-tests 'taoensso.carmine-v4.cluster))

;;;; TODO
;; x Review Cluster docs, write initial sketch.
;; x Implement key-slot hashing.
;; x Data structure for slots-state?

;;;; 1st sketch

;; Without cluster:
;; - with-car [conn-opts]
;; - get-conn [conn-opts], with-conn
;; - With non-cluster ctx [in out]
;; - Flush any pending reqs to allow nesting
;; - New reqs -> pending reqs
;; - Flush
;; - Write reqs
;; - Read replies

;; With cluster:
;; - with-car [conn-opts]
;; - With cluster ctx [conn-opts]
;; - Flush any pending reqs to allow nesting
;; - New reqs -> pending reqs
;; - Flush
;; - get-conn [conn-opts], with-conn for each shard
;; - Write reqs
;; - Read replies

;; [ ] [<1 day]
;; conn-opts to incl {:keys [cluster-spec cluster-opts]} :server
;; - => Can use Sentinel or Cluster, not both
;; - cluster-spec constructor will take initial set of shard addrs
;; - cluster-opts to contain :conn-opts to use when updating state, etc.
;; - Ensure Sentinel conn-opts doesn't include Sentinel or Cluster server x
;; - Ensure Cluster conn-opts doesn't include Sentinel or Cluster server
;; - Ensure :select-db is nil/0 when using Cluster

;; Slots:
;; - Slot range is divided between different shards (shard-addrs)
;; - Each Req will have optional slot field
;; - Slots can be determined automatically for auto-generated commands:
;; - First arg after command name seems to usu. indicate "key".
;; - If there's any additional keys, their slots would anyway need to agree
;; - `redis-call` and co. expect `cluster-key` to be called manually on the appropriate arg

;; [ ] [1-2 days]
;; Stateful ClusterSpec:
;; - shards-state_: sorted map {[<slot-lo> <slot-hi>] {:master <addr> :replicas #{<addr>s}}}
;; - "Stable" when no ongoing reconfig (how to detect?)
;; - ^:private slot->shard-addr [spec parsed-cluster-opts slot]
;; - Check :prefer-read-replica? in cluster-opts
;; - Returns (or ?random-replica master)
;; - Some slots may not have shard-addr, even after updating state
;; - ^:public update-shards! [spec parsed-cluster-opts async?]
;; - Use locking and/or delay with timeout (fire future on CAS state->updating)
;; - Use :conn-opts in cluster-opts
;; - Use `SENTINEL SHARDS` or `SENTINEL SLOTS` command (support both)
;; - Stats incl.: n-shards, n-reshards, n-moved, n-ask etc.
;; - Cbs incl.: on-changed-shards, on-key-moved, etc.

;; [ ] [2-3 days]
;; Cluster specific flush [conn-opts] implementation:
;; - [1] First partition reqs into n target shards
;; - [1b] Check `cluster-slot` and `supports-cluster?` (must be true or nil)
;; - [2] Acquire conns to n target shards (use wcar conn-opts with injected shard-addr)
;; - [2b] If :prefer-read-replica? in cluster-opts -
;; Call READONLY/READWRITE (skipping replies)
;; - [*] Mention that we _could_ use fp to write & read to each shard simultaneously
;; - [3] Write to all shards
;; - [4] Read replies from all shards
;; - [5] If there's any -MOVED, -ASK, or conn (?) errors:
;; - Ask ClusterSpec to update-shards! (async?)
;; - Retry these reqs
;; - [6] Carefully stitch back replies in correct order
;; - [7] Ensure that nesting works as expected

;; Details on partitioning scheme (could be pure, data-oriented fn):
;; - Loop through all reqs in order
;; - If req -> slot -> shard-addr, add [req req-idx] to partition for that shard-addr
;; - If req -> nil, add [req req-idx] to last non-nil partition,
;; or buffer until first non-nil partition.
;; - If never any non-nil partition: choose random shard-addr.

;; Conn pooling:
;; - Pool to operate solely on [ip port] servers, injected by slot->addr in flush.
;; - I.e. pool needs no invalidation or kop-key changes.

;;;; Misc

;; - Would use Cluster or Sentinel, not both.
;; - Sentinel provides best availability, and some read perf via replicas.
;; - Cluster provides some availability, and read+write perf via sharding.

;; - Cluster supports all 1-key commands, and >1 key commands iff all keys
;; in same hash slot.
;; - Select command not allowed.
;; - Optional hash tags allow manual control of hash slots.

;; - Cluster "stable" when no ongoing reconfig (i.e. hash slots being moved)
;; - Each node has unique node-id
;; - Nodes can change IP without changing node-id (problematic?)

;; - Cluster has internal concept of {<slot> <node-id>}
;; - Client should store state like
;; {[<slot-lo> <slot-hi>] {:master <addr> :replicas #{<addr>s}}}, more
;; - To get cluster topology:
;; - CLUSTER SHARDS (Redis >= v7),
;; - CLUSTER SLOTS (Redis <= v6), deprecated
;; - Client cannot assume that all slots will be accounted for,
;; may need to re-fetch topology or try a random node
;; - Update topology when:
;; - Initially empty
;; - Any command saw a -MOVED error (use locking?)

;; - Possible Cluster errors:
;; - -MOVED => permanently moved
;; -MOVED 3999 ; (3999 = key slot) => try ip:port
;; -MOVED 3999 :6380 ; => unknown endpoint, try <same-ip>:port
;; - On redirection error:
;; - Either update cache for specific slot, or whole topology
;; - Prefer whole topology (since one move usu. => more)
;; - ASK =>
;; - Send this query (ONCE) to specified endpoint, don't update cache
;; - Start redirected query with ASKING
;; - TRYAGAIN => reshard in progress, wait to retry or throw

;; - Possible READONLY / READWRITE commands during :init?
;; (Nb should affect kop-key)

;; - Redis v7+ "Shared pub/sub" implications?

;;;; Key slots

(def ^:private ^:const num-key-slots 16384)
(let [xmodem-crc16-lookup

(defn- crc16
"Returns hash for given bytes using the Redis Cluster CRC16 algorithm,
Ref. (Appendix A).
Thanks to @bpoweski for this implementation."
[^bytes ba]
(let [len (alength ba)]
(loop [n 0
crc 0] ; Inlines faster than `enc/reduce-n`
(if (>= n len)
(recur (unchecked-inc n)
(bit-xor (bit-and (bit-shift-left crc 8) 0xffff)
(aget xmodem-crc16-lookup
(-> (bit-shift-right crc 8)
(bit-xor (aget ba n))
(bit-and 0xff))))))))))

(defn- ba->key-slot [^bytes ba] (mod (crc16 ba) num-key-slots))
(defn- tag-str->key-slot [^String tag-str] (ba->key-slot (com/str->bytes tag-str)))

(defprotocol IClusterKey (^:public cluster-key [redis-key] "TODO: Docstring"))
(deftype ClusterKey [^bytes ba ^long slot]
clojure.lang.IDeref (deref [this] slot) ; For tests
IClusterKey (cluster-key [this] this))

(extend-type (Class/forName "[B")
IClusterKey (cluster-key [ba] (ClusterKey. ba (ba->key-slot ba))))

(extend-type String
(cluster-key [s]
(let [s-ba (com/str->bytes s)]
(if-let [tag-str
(when (enc/str-contains? s "{")
(when-let [match (re-find #"\{(.*?)\}" s)]
(when-let [^String tag (get match 1)] ; "bar" in "foo{bar}{baz}"
(when-not (.isEmpty tag) tag))))]

(ClusterKey. s-ba (tag-str->key-slot tag-str))
(ClusterKey. s-ba (ba->key-slot s-ba))))))

(deftest ^:private _key-slots
[(is (= @(cluster-key "foo") 12182))
(is (= @(cluster-key "ignore{foo}") 12182))
(is (= @(cluster-key (cluster-key "ignore{foo}")) 12182))])

(defn cluster-slot [x] (when (instance? ClusterKey x) (.-slot ^ClusterKey x)))

(enc/qb 1e5 ; [51.42 73.96]
(cluster-key "foo")
(cluster-key "ignore{foo}")))


(def sm
[12 30] "a"
[16 18] "b"))

(defn find-entry [sm ^long n]
(fn [acc lohi v]
(if (and
(>= n ^long (get lohi 0))
(<= n ^long (get lohi 1)))
(reduced v)

(comment (find-entry sm 16)))

0 comments on commit 8dd2670

Please sign in to comment.