ggml-org · igardev · Jan 5, 2026 · Jan 5, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -151,6 +151,16 @@
         "key": "ctrl+x",
         "when": "editorTextFocus"
       },
+      {
+        "command": "extension.selectNextSuggestion",
+        "key": "alt+]",
+        "when": "editorTextFocus && inlineSuggestionVisible"
+      },
+      {
+        "command": "extension.selectPreviousSuggestion",
+        "key": "alt+[",
+        "when": "editorTextFocus && inlineSuggestionVisible"
+      },
       {
         "command": "extension.acceptFirstLine",
         "key": "shift+tab",
@@ -271,6 +281,11 @@
           "default": "",
           "description": "The URL to be used by the extension for creating embeddings."
         },
+        "llama-vscode.max_parallel_completions": {
+          "type": "number",
+          "default": 3,
+          "description": "The max number of parallel completions. Switching between completions could be done with Alt+] (next) or Alt =+[ (previous). "
+        },
         "llama-vscode.new_completion_model_port": {
           "type": "number",
           "default": 8012,

diff --git a/resources/help.md b/resources/help.md
diff --git a/src/architect.ts b/src/architect.ts
@@ -99,6 +99,36 @@ export class Architect {
         context.subscriptions.push(changeActiveTextEditorDisp)
     }
 
+    registerCommandSelectNextSuggestion = (context: vscode.ExtensionContext) => {
+        const selectNextSuggestionCommand = vscode.commands.registerCommand(
+            'extension.selectNextSuggestion',
+            async () => {
+                const editor = vscode.window.activeTextEditor;
+                if (!editor) {
+                    return;
+                }
+                await vscode.commands.executeCommand('editor.action.inlineSuggest.showNext');
+                await this.app.completion.increaseSuggestionIndex();
+            }
+        );
+        context.subscriptions.push(selectNextSuggestionCommand);
+    }
+
+    registerCommandSelectPreviousSuggestion = (context: vscode.ExtensionContext) => {
+        const selectPreviousSuggestionCommand = vscode.commands.registerCommand(
+            'extension.selectPreviousSuggestion',
+            async () => {
+                const editor = vscode.window.activeTextEditor;
+                if (!editor) {
+                    return;
+                }
+                await vscode.commands.executeCommand('editor.action.inlineSuggest.showPrevious');
+                await this.app.completion.decreaseSuggestionIndex();
+            }
+        );
+        context.subscriptions.push(selectPreviousSuggestionCommand);
+    }
+
     registerCommandAcceptFirstLine = (context: vscode.ExtensionContext) => {
         const acceptFirstLineCommand = vscode.commands.registerCommand(
             'extension.acceptFirstLine',

diff --git a/src/completion.ts b/src/completion.ts
@@ -4,18 +4,19 @@ import vscode from "vscode";
 import {Utils} from "./utils";
 
 interface CompletionDetails {
-    completion: string;
+    completions: string[];
     position: vscode.Position;
     inputPrefix: string;
     inputSuffix: string;
     prompt: string;
+    complIndex: number;
 }
 
 export class Completion {
     private app: Application
     private isRequestInProgress = false
     isForcedNewRequest = false
-    lastCompletion: CompletionDetails = {completion: "", position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""};
+    lastCompletion: CompletionDetails = {completions: [], complIndex: 0, position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""};
 
     constructor(application: Application) {
         this.app = application;
@@ -66,8 +67,8 @@ export class Completion {
         try {
             let data: LlamaResponse | undefined
             let hashKey = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + prompt)
-            let completion = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt)
-            let isCachedResponse = !this.isForcedNewRequest && completion != undefined
+            let completions = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt)
+            let isCachedResponse = !this.isForcedNewRequest && completions != undefined
             if (!isCachedResponse) {
                 this.isForcedNewRequest = false
                 if (token.isCancellationRequested){
@@ -78,46 +79,55 @@ export class Completion {
                 this.app.statusbar.showThinkingInfo();
 
                 data = await this.app.llamaServer.getFIMCompletion(inputPrefix, inputSuffix, prompt, this.app.extraContext.chunks, nindent)
-                if (data != undefined) completion = data.content;
-                else completion = undefined
+                if (data != undefined) completions = this.getComplFromContent(data);
+                else completions = undefined
             }
-            if (completion == undefined || completion.trim() == ""){
+            if (completions == undefined || completions.length == 0){
                 this.app.statusbar.showInfo(undefined);
                 this.isRequestInProgress = false
                 this.app.logger.addEventLog(group, "NO_SUGGESTION_RETURN", "")
                 return [];
             }
 
-            let suggestionLines = completion.split(/\r?\n/)
-            Utils.removeTrailingNewLines(suggestionLines);
-
-            if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) {
+            let newCompletions: string[] = []
+            let firstComplLines: string[] = []
+            for (let compl of completions){
+                let suggestionLines = compl.split(/\r?\n/)
+                Utils.removeTrailingNewLines(suggestionLines);
+
+                if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) {
+                    continue
+                } else {
+                    compl = this.updateSuggestion(suggestionLines, lineSuffix);
+                    newCompletions.push(compl);
+                    if (firstComplLines.length == 0) firstComplLines = suggestionLines;
+                }     
+            }
+            if (newCompletions.length == 0){
                 this.app.statusbar.showInfo(undefined);
-                this.isRequestInProgress = false
-                this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "")
-                return [];
+                    this.isRequestInProgress = false
+                    this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "")
+                    return [];
             }
 
-            completion = this.updateSuggestion(suggestionLines, lineSuffix);
-
-            if (!isCachedResponse) this.app.lruResultCache.put(hashKey, completion)
-            this.lastCompletion = this.getCompletionDetails(completion, position, inputPrefix, inputSuffix, prompt);
+            if (!isCachedResponse && newCompletions) this.app.lruResultCache.put(hashKey, newCompletions)
+            this.lastCompletion = this.getCompletionDetails(newCompletions, position, inputPrefix, inputSuffix, prompt);
 
             // Run async as not needed for the suggestion
             setTimeout(async () => {
                 if (isCachedResponse) this.app.statusbar.showCachedInfo()
                 else this.app.statusbar.showInfo(data);
                 if (!token.isCancellationRequested && lineSuffix.trim() === ""){
-                    await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
-                    await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
+                    await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, firstComplLines);
+                    await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, firstComplLines);
                 }
                 if (!token.isCancellationRequested){
                     this.app.extraContext.addFimContextChunks(position, context, document);
                 }
             }, 0);
             this.isRequestInProgress = false
-            this.app.logger.addEventLog(group, "NORMAL_RETURN", suggestionLines[0])
-            return [this.getCompletion(this.removeLeadingSpaces(completion, spacesToRemove), position)];
+            this.app.logger.addEventLog(group, "NORMAL_RETURN", firstComplLines[0])
+            return this.getCompletion(newCompletions||[], position, spacesToRemove);
         } catch (err) {
             console.error("Error fetching llama completion:", err);
             vscode.window.showInformationMessage(this.app.configuration.getUiText(`Error getting response. Please check if llama.cpp server is running.`)??"");
@@ -155,21 +165,36 @@ export class Completion {
             let promptCut = prompt.slice(i)
             let hash = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + newPrompt)
             let result = this.app.lruResultCache.get(hash)
-            if (result != undefined && promptCut == result.slice(0,promptCut.length)) return result.slice(prompt.length - newPrompt.length)
+            if (result == undefined) continue
+            let completions: string[] = []
+            for (const compl of result){
+                if (compl && promptCut == compl.slice(0,promptCut.length)) {
+                    completions.push(compl.slice(prompt.length - newPrompt.length))
+                }
+            }
+            if (completions.length > 0) return completions;
         }
 
         return undefined
     }
 
-    getCompletion = (completion: string, position: vscode.Position) => {
-        return new vscode.InlineCompletionItem(
-            completion,
-            new vscode.Range(position, position)
-        );
+    getCompletion = (completions: string[], 
+        position: vscode.Position,
+        spacesToRemove: number): vscode.InlineCompletionItem[] => {
+        let completionItems: vscode.InlineCompletionItem[] = []
+        for (const completion of completions){
+            const compl: vscode.InlineCompletionItem = new vscode.InlineCompletionItem(
+                this.removeLeadingSpaces(completion, spacesToRemove),
+                new vscode.Range(position, position)
+            )
+            completionItems.push(compl);
+        }
+
+        return completionItems;
     }
 
-    private getCompletionDetails = (completion: string, position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => {
-        return { completion: completion, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt };
+    private getCompletionDetails = (completions: string[], position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => {
+        return { completions: completions,complIndex: 0, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt };
     }
 
     // logic for discarding predictions that repeat existing text
@@ -241,14 +266,17 @@ export class Completion {
         let cached_completion = this.app.lruResultCache.get(futureHashKey)
         if (cached_completion != undefined) return;
         let futureData = await this.app.llamaServer.getFIMCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.app.extraContext.chunks, prompt.length - prompt.trimStart().length);
-        let futureSuggestion = "";
+        let futureSuggestions = [];
         if (futureData != undefined && futureData.content != undefined && futureData.content.trim() != "") {
-            futureSuggestion = futureData.content;
-            let suggestionLines = futureSuggestion.split(/\r?\n/)
-            Utils.removeTrailingNewLines(suggestionLines);
-            futureSuggestion = suggestionLines.join('\n')
+            let suggestions = this.getComplFromContent(futureData);
+            for (let futureSuggestion of suggestions||[]){
+                let suggestionLines = futureSuggestion.split(/\r?\n/)
+                Utils.removeTrailingNewLines(suggestionLines);
+                futureSuggestion = suggestionLines.join('\n')
+                futureSuggestions.push(futureSuggestion)
+            }
             let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt);
-            this.app.lruResultCache.put(futureHashKey, futureSuggestion);
+            this.app.lruResultCache.put(futureHashKey, futureSuggestions);
         }
     }
 
@@ -262,13 +290,13 @@ export class Completion {
             let futureSuggestion = suggestionLines.slice(1).join('\n')
             let cached_completion = this.app.lruResultCache.get(futureHashKey)
             if (cached_completion != undefined) return;
-            else this.app.lruResultCache.put(futureHashKey, futureSuggestion)
+            else this.app.lruResultCache.put(futureHashKey, [futureSuggestion])
         }
     }
 
     insertNextWord = async (editor: vscode.TextEditor) => {
         // Retrieve the last inline completion item
-        const lastSuggestion = this.lastCompletion.completion;
+        const lastSuggestion = this.lastCompletion.completions[this.lastCompletion.complIndex];
         if (!lastSuggestion) {
             return;
         }
@@ -294,7 +322,7 @@ export class Completion {
 
     insertFirstLine = async (editor: vscode.TextEditor) => {
         // Retrieve the last inline completion item
-        const lastItem = this.lastCompletion.completion;
+        const lastItem = this.lastCompletion.completions[this.lastCompletion.complIndex];
         if (!lastItem) {
             return;
         }
@@ -311,4 +339,36 @@ export class Completion {
             editBuilder.insert(position, insertLine);
         });
     }
+
+    increaseSuggestionIndex = async () => {
+        const totalCompletions = this.lastCompletion.completions.length
+        if (totalCompletions > 0){
+            this.lastCompletion.complIndex = (this.lastCompletion.complIndex + 1) % totalCompletions
+        }
+    }
+
+    decreaseSuggestionIndex = async () => {
+        const totalCompletions = this.lastCompletion.completions.length
+        if (totalCompletions > 0){
+            if (this.lastCompletion.complIndex > 0) this.lastCompletion.complIndex--
+            else this.lastCompletion.complIndex = totalCompletions - 1
+        }
+    }
+
+    private getComplFromContent(codeCompletions: any): string[] | undefined {
+        if ("content" in codeCompletions) 
+            return codeCompletions.content??""
+
+        if (codeCompletions.length > 0){
+            let completions: Set<string> = new Set()
+            for (const compl of codeCompletions){
+                completions.add(compl.content??"")
+            }
+            return Array.from(completions);
+        } 
+        else return [];
+    }
+
+    // Function for printing the first n Fibonachi numbers
+
 }
diff --git a/src/configuration.ts b/src/configuration.ts
@@ -93,6 +93,7 @@ export class Configuration {
     chats_max_tokens = 64000;
     chats_summarize_old_msgs = false;
     chats_msgs_keep = 50
+    max_parallel_completions = 3
     completion_models_list = new Array();
     embeddings_models_list = new Array();
     tools_models_list = new Array();
@@ -239,6 +240,7 @@ export class Configuration {
         this.tools_log_calls = Boolean(config.get<boolean>("tools_log_calls"));
         this.chats_max_history = Number(config.get<number>("chats_max_history"));
         this.chats_max_tokens = Number(config.get<number>("chats_max_tokens"));
+        this.max_parallel_completions = Number(config.get<number>("max_parallel_completions"));
         this.chats_summarize_old_msgs = Boolean(config.get<boolean>("chats_summarize_old_msgs"));
         this.chats_msgs_keep = Number(config.get<number>("chats_msgs_keep"));
         this.skills_folder = String(config.get<string>("skills_folder"));

diff --git a/src/extension.ts b/src/extension.ts
@@ -29,6 +29,8 @@ export function activate(context: vscode.ExtensionContext) {
     app.architect.registerGenarateCommitMsg(context)
     app.architect.registerCommandKillAgent(context)
     app.architect.registerWebviewProvider(context)
+    app.architect.registerCommandSelectNextSuggestion(context)
+    app.architect.registerCommandSelectPreviousSuggestion(context)
     app.architect.init()
 }
 

diff --git a/src/llama-server.ts b/src/llama-server.ts
@@ -113,7 +113,7 @@ export class LlamaServer {
                 input_suffix: inputSuffix,
                 input_extra: chunks,
                 prompt,
-                n_predict: 0,
+                n_predict: 0,                
                 samplers: [],
                 cache_prompt: true,
                 t_max_prompt_ms: this.app.configuration.t_max_prompt_ms,
@@ -125,9 +125,10 @@ export class LlamaServer {
         return {
             input_prefix: inputPrefix,
             input_suffix: inputSuffix,
-            input_extra: chunks,
+            input_extra: chunks, 
             prompt,
             n_predict: this.app.configuration.n_predict,
+            n_cmpl: this.app.configuration.max_parallel_completions,
             ...this.defaultRequestParams,
             ...(nindent && { n_indent: nindent }),
             t_max_prompt_ms: this.app.configuration.t_max_prompt_ms,

diff --git a/src/lru-cache.ts b/src/lru-cache.ts
@@ -2,7 +2,7 @@ import * as crypto from 'crypto';
 
 export class LRUCache {
     private capacity: number;
-    private map: Map<string, string>;
+    private map: Map<string, string[]>;
 
     constructor(capacity: number) {
         if (capacity <= 0) {
@@ -18,7 +18,7 @@ export class LRUCache {
      * @param key The key to retrieve.
      * @returns The value associated with the key, or undefined if the key is not found.
      */
-    get = (key: string): string | undefined => {
+    get = (key: string): string[] | undefined => {
         if (!this.map.has(key)) {
             return undefined;
         }
@@ -37,7 +37,7 @@ export class LRUCache {
      * @param key The key to insert or update.
      * @param value The value to associate with the key.
      */
-    put = (key: string, value: string): void => {
+    put = (key: string, value: string[]): void => {
         if (this.map.has(key)) {
             // If the key exists, delete it to refresh its position
             this.map.delete(key);