@@ -145,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
145
145
readonly providerId : string = 'llamacpp'
146
146
147
147
private config : LlamacppConfig
148
- private activeSessions : Map < number , SessionInfo > = new Map ( )
149
148
private providerPath ! : string
150
149
private apiSecret : string = 'JustAskNow'
151
150
private pendingDownloads : Map < string , Promise < void > > = new Map ( )
@@ -771,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {
771
770
772
771
override async onUnload ( ) : Promise < void > {
773
772
// Terminate all active sessions
774
- for ( const [ _ , sInfo ] of this . activeSessions ) {
775
- try {
776
- await this . unload ( sInfo . model_id )
777
- } catch ( error ) {
778
- logger . error ( `Failed to unload model ${ sInfo . model_id } :` , error )
779
- }
780
- }
781
-
782
- // Clear the sessions map
783
- this . activeSessions . clear ( )
784
773
}
785
774
786
775
onSettingUpdate < T > ( key : string , value : T ) : void {
@@ -1104,75 +1093,21 @@ export default class llamacpp_extension extends AIEngine {
1104
1093
* Function to find a random port
1105
1094
*/
1106
1095
private async getRandomPort ( ) : Promise < number > {
1107
- const MAX_ATTEMPTS = 20000
1108
- let attempts = 0
1109
-
1110
- while ( attempts < MAX_ATTEMPTS ) {
1111
- const port = Math . floor ( Math . random ( ) * 1000 ) + 3000
1112
-
1113
- const isAlreadyUsed = Array . from ( this . activeSessions . values ( ) ) . some (
1114
- ( info ) => info . port === port
1115
- )
1116
-
1117
- if ( ! isAlreadyUsed ) {
1118
- const isAvailable = await invoke < boolean > ( 'is_port_available' , { port } )
1119
- if ( isAvailable ) return port
1120
- }
1121
-
1122
- attempts ++
1123
- }
1124
-
1125
- throw new Error ( 'Failed to find an available port for the model to load' )
1126
- }
1127
-
1128
- private async sleep ( ms : number ) : Promise < void > {
1129
- return new Promise ( ( resolve ) => setTimeout ( resolve , ms ) )
1130
- }
1131
-
1132
- private async waitForModelLoad (
1133
- sInfo : SessionInfo ,
1134
- timeoutMs = 240_000
1135
- ) : Promise < void > {
1136
- await this . sleep ( 500 ) // Wait before first check
1137
- const start = Date . now ( )
1138
- while ( Date . now ( ) - start < timeoutMs ) {
1139
- try {
1140
- const res = await fetch ( `http://localhost:${ sInfo . port } /health` )
1141
-
1142
- if ( res . status === 503 ) {
1143
- const body = await res . json ( )
1144
- const msg = body ?. error ?. message ?? 'Model loading'
1145
- logger . info ( `waiting for model load... (${ msg } )` )
1146
- } else if ( res . ok ) {
1147
- const body = await res . json ( )
1148
- if ( body . status === 'ok' ) {
1149
- return
1150
- } else {
1151
- logger . warn ( 'Unexpected OK response from /health:' , body )
1152
- }
1153
- } else {
1154
- logger . warn ( `Unexpected status ${ res . status } from /health` )
1155
- }
1156
- } catch ( e ) {
1157
- await this . unload ( sInfo . model_id )
1158
- throw new Error ( `Model appears to have crashed: ${ e } ` )
1159
- }
1160
-
1161
- await this . sleep ( 800 ) // Retry interval
1096
+ try {
1097
+ const port = await invoke < number > ( 'get_random_port' )
1098
+ return port
1099
+ } catch {
1100
+ logger . error ( 'Unable to find a suitable port' )
1101
+ throw new Error ( 'Unable to find a suitable port for model' )
1162
1102
}
1163
-
1164
- await this . unload ( sInfo . model_id )
1165
- throw new Error (
1166
- `Timed out loading model after ${ timeoutMs } ... killing llamacpp`
1167
- )
1168
1103
}
1169
1104
1170
1105
override async load (
1171
1106
modelId : string ,
1172
1107
overrideSettings ?: Partial < LlamacppConfig > ,
1173
1108
isEmbedding : boolean = false
1174
1109
) : Promise < SessionInfo > {
1175
- const sInfo = this . findSessionByModel ( modelId )
1110
+ const sInfo = await this . findSessionByModel ( modelId )
1176
1111
if ( sInfo ) {
1177
1112
throw new Error ( 'Model already loaded!!' )
1178
1113
}
@@ -1342,11 +1277,6 @@ export default class llamacpp_extension extends AIEngine {
1342
1277
libraryPath,
1343
1278
args,
1344
1279
} )
1345
-
1346
- // Store the session info for later use
1347
- this . activeSessions . set ( sInfo . pid , sInfo )
1348
- await this . waitForModelLoad ( sInfo )
1349
-
1350
1280
return sInfo
1351
1281
} catch ( error ) {
1352
1282
logger . error ( 'Error in load command:\n' , error )
@@ -1355,13 +1285,12 @@ export default class llamacpp_extension extends AIEngine {
1355
1285
}
1356
1286
1357
1287
override async unload ( modelId : string ) : Promise < UnloadResult > {
1358
- const sInfo : SessionInfo = this . findSessionByModel ( modelId )
1288
+ const sInfo : SessionInfo = await this . findSessionByModel ( modelId )
1359
1289
if ( ! sInfo ) {
1360
1290
throw new Error ( `No active session found for model: ${ modelId } ` )
1361
1291
}
1362
1292
const pid = sInfo . pid
1363
1293
try {
1364
- this . activeSessions . delete ( pid )
1365
1294
1366
1295
// Pass the PID as the session_id
1367
1296
const result = await invoke < UnloadResult > ( 'unload_llama_model' , {
@@ -1373,13 +1302,11 @@ export default class llamacpp_extension extends AIEngine {
1373
1302
logger . info ( `Successfully unloaded model with PID ${ pid } ` )
1374
1303
} else {
1375
1304
logger . warn ( `Failed to unload model: ${ result . error } ` )
1376
- this . activeSessions . set ( sInfo . pid , sInfo )
1377
1305
}
1378
1306
1379
1307
return result
1380
1308
} catch ( error ) {
1381
1309
logger . error ( 'Error in unload command:' , error )
1382
- this . activeSessions . set ( sInfo . pid , sInfo )
1383
1310
return {
1384
1311
success : false ,
1385
1312
error : `Failed to unload model: ${ error } ` ,
@@ -1502,17 +1429,21 @@ export default class llamacpp_extension extends AIEngine {
1502
1429
}
1503
1430
}
1504
1431
1505
- private findSessionByModel ( modelId : string ) : SessionInfo | undefined {
1506
- return Array . from ( this . activeSessions . values ( ) ) . find (
1507
- ( session ) => session . model_id === modelId
1508
- )
1432
+ private async findSessionByModel ( modelId : string ) : Promise < SessionInfo > {
1433
+ try {
1434
+ let sInfo = await invoke < SessionInfo > ( 'find_session_by_model' , { modelId} )
1435
+ return sInfo
1436
+ } catch ( e ) {
1437
+ logger . error ( e )
1438
+ throw new Error ( e )
1439
+ }
1509
1440
}
1510
1441
1511
1442
override async chat (
1512
1443
opts : chatCompletionRequest ,
1513
1444
abortController ?: AbortController
1514
1445
) : Promise < chatCompletion | AsyncIterable < chatCompletionChunk > > {
1515
- const sessionInfo = this . findSessionByModel ( opts . model )
1446
+ const sessionInfo = await this . findSessionByModel ( opts . model )
1516
1447
if ( ! sessionInfo ) {
1517
1448
throw new Error ( `No active session found for model: ${ opts . model } ` )
1518
1449
}
@@ -1528,7 +1459,6 @@ export default class llamacpp_extension extends AIEngine {
1528
1459
throw new Error ( 'Model appears to have crashed! Please reload!' )
1529
1460
}
1530
1461
} else {
1531
- this . activeSessions . delete ( sessionInfo . pid )
1532
1462
throw new Error ( 'Model have crashed! Please reload!' )
1533
1463
}
1534
1464
const baseUrl = `http://localhost:${ sessionInfo . port } /v1`
@@ -1577,11 +1507,13 @@ export default class llamacpp_extension extends AIEngine {
1577
1507
}
1578
1508
1579
1509
override async getLoadedModels ( ) : Promise < string [ ] > {
1580
- let lmodels : string [ ] = [ ]
1581
- for ( const [ _ , sInfo ] of this . activeSessions ) {
1582
- lmodels . push ( sInfo . model_id )
1583
- }
1584
- return lmodels
1510
+ try {
1511
+ let models : string [ ] = await invoke < string [ ] > ( 'get_loaded_models' )
1512
+ return models
1513
+ } catch ( e ) {
1514
+ logger . error ( e )
1515
+ throw new Error ( e )
1516
+ }
1585
1517
}
1586
1518
1587
1519
async getDevices ( ) : Promise < DeviceList [ ] > {
@@ -1611,7 +1543,7 @@ export default class llamacpp_extension extends AIEngine {
1611
1543
}
1612
1544
1613
1545
async embed ( text : string [ ] ) : Promise < EmbeddingResponse > {
1614
- let sInfo = this . findSessionByModel ( 'sentence-transformer-mini' )
1546
+ let sInfo = await this . findSessionByModel ( 'sentence-transformer-mini' )
1615
1547
if ( ! sInfo ) {
1616
1548
const downloadedModelList = await this . list ( )
1617
1549
if (
0 commit comments