File tree Expand file tree Collapse file tree 4 files changed +20
-6
lines changed Expand file tree Collapse file tree 4 files changed +20
-6
lines changed Original file line number Diff line number Diff line change @@ -334,9 +334,13 @@ spec:
334334 value : " {{ .Release.Name }}-router-service:{{ $modelSpec.lmcacheConfig.controllerPort }}"
335335 {{- end }}
336336 {{- if hasKey $modelSpec.lmcacheConfig "workerPort" }}
337- - name : LMCACHE_WORKER_PORT
337+ - name : LMCACHE_LMCACHE_WORKER_PORT
338338 value : {{ $modelSpec.lmcacheConfig.workerPort | quote }}
339339 {{- end }}
340+ {{- if hasKey $modelSpec.lmcacheConfig "distributedUrl" }}
341+ - name : LMCACHE_DISTRIBUTED_URL
342+ value : {{ $modelSpec.lmcacheConfig.distributedUrl | quote }}
343+ {{- end }}
340344 {{- end }}
341345 {{- if or .Values.servingEngineSpec.configs $modelSpec.envFromSecret }}
342346 envFrom :
Original file line number Diff line number Diff line change @@ -39,10 +39,10 @@ default = [] # leave this empty because pip requires at least one specifier
3939semantic_cache = [
4040 " sentence-transformers==2.2.2" ,
4141 " faiss-cpu==1.10.0" ,
42- " huggingface-hub==0.25.2 " , # downgrade to 0.25.2 to avoid breaking changes
42+ " huggingface-hub==0.34.0 " ,
4343]
4444lmcache = [
45- " lmcache==0.2.1 " ,
45+ " lmcache==0.3.5 " ,
4646]
4747
4848[build-system ]
Original file line number Diff line number Diff line change 1818import math
1919import random
2020import threading
21+ import uuid
2122from typing import Dict , List
2223
2324from fastapi import Request
@@ -289,7 +290,9 @@ async def route_request(
289290 url = endpoints [0 ].url + "/tokenize"
290291 # TODO (Yuhan): Handle chat completions
291292 token_ids = self .tokenizer .encode (request_json ["prompt" ])
292- msg = LookupMsg (tokens = token_ids )
293+ event_id = "Lookup" + str (uuid .uuid4 ())
294+ logger .debug (f"Lookup event id: { event_id } " )
295+ msg = LookupMsg (tokens = token_ids , event_id = event_id )
293296 instance_id = await self .query_manager (msg )
294297 matched_tokens = math .inf
295298 if len (list (instance_id .layout_info .keys ())) > 0 :
@@ -321,10 +324,13 @@ async def route_request(
321324 queried_instance_ids = [info for info in instance_id .layout_info ]
322325 if queried_instance_ids [0 ] not in self .instance_id_to_ip :
323326 for endpoint in endpoints :
327+ event_id = "QueryInst" + str (uuid .uuid4 ())
328+ logger .debug (f"QueryInst event id: { event_id } " )
324329 query_message = QueryInstMsg (
325330 ip = endpoint .url .split (f":{ endpoint .url .split (':' )[- 1 ]} " )[
326331 0
327- ].split ("//" )[1 ]
332+ ].split ("//" )[1 ],
333+ event_id = event_id ,
328334 )
329335 endpoint_instance_id = await self .query_manager (query_message )
330336
Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ servingEngineSpec:
2121 instanceId : " default1"
2222 controllerPort : " 9000"
2323 workerPort : 8001
24+ distributedUrl : " localhost:8201"
2425
2526 env :
2627 - name : LMCACHE_LOG_LEVEL
@@ -46,6 +47,7 @@ servingEngineSpec:
4647 instanceId : " default2"
4748 controllerPort : " 9000"
4849 workerPort : 8002
50+ distributedUrl : " localhost:8202"
4951
5052 env :
5153 - name : LMCACHE_LOG_LEVEL
@@ -72,6 +74,7 @@ servingEngineSpec:
7274 instanceId : " default3"
7375 controllerPort : " 9000"
7476 workerPort : 8003
77+ distributedUrl : " localhost:8203"
7578
7679 env :
7780 - name : LMCACHE_LOG_LEVEL
@@ -97,6 +100,7 @@ servingEngineSpec:
97100 instanceId : " default4"
98101 controllerPort : " 9000"
99102 workerPort : 8004
103+ distributedUrl : " localhost:8204"
100104
101105 env :
102106 - name : LMCACHE_LOG_LEVEL
@@ -105,7 +109,7 @@ servingEngineSpec:
105109
106110routerSpec :
107111 repository : " lmcache/lmstack-router"
108- tag : " kvaware- latest"
112+ tag : " latest"
109113 resources :
110114 requests :
111115 cpu : " 1"
You can’t perform that action at this time.
0 commit comments