Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,16 @@
"key": "ctrl+x",
"when": "editorTextFocus"
},
{
"command": "extension.selectNextSuggestion",
"key": "alt+]",
"when": "editorTextFocus && inlineSuggestionVisible"
},
{
"command": "extension.selectPreviousSuggestion",
"key": "alt+[",
"when": "editorTextFocus && inlineSuggestionVisible"
},
{
"command": "extension.acceptFirstLine",
"key": "shift+tab",
Expand Down Expand Up @@ -271,6 +281,11 @@
"default": "",
"description": "The URL to be used by the extension for creating embeddings."
},
"llama-vscode.max_parallel_completions": {
"type": "number",
"default": 3,
"description": "The max number of parallel completions. Switching between completions could be done with Alt+] (next) or Alt =+[ (previous). "
},
"llama-vscode.new_completion_model_port": {
"type": "number",
"default": 8012,
Expand Down
203 changes: 89 additions & 114 deletions resources/help.md

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions src/architect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,36 @@ export class Architect {
context.subscriptions.push(changeActiveTextEditorDisp)
}

registerCommandSelectNextSuggestion = (context: vscode.ExtensionContext) => {
const selectNextSuggestionCommand = vscode.commands.registerCommand(
'extension.selectNextSuggestion',
async () => {
const editor = vscode.window.activeTextEditor;
if (!editor) {
return;
}
await vscode.commands.executeCommand('editor.action.inlineSuggest.showNext');
await this.app.completion.increaseSuggestionIndex();
}
);
context.subscriptions.push(selectNextSuggestionCommand);
}

registerCommandSelectPreviousSuggestion = (context: vscode.ExtensionContext) => {
const selectPreviousSuggestionCommand = vscode.commands.registerCommand(
'extension.selectPreviousSuggestion',
async () => {
const editor = vscode.window.activeTextEditor;
if (!editor) {
return;
}
await vscode.commands.executeCommand('editor.action.inlineSuggest.showPrevious');
await this.app.completion.decreaseSuggestionIndex();
}
);
context.subscriptions.push(selectPreviousSuggestionCommand);
}

registerCommandAcceptFirstLine = (context: vscode.ExtensionContext) => {
const acceptFirstLineCommand = vscode.commands.registerCommand(
'extension.acceptFirstLine',
Expand Down
138 changes: 99 additions & 39 deletions src/completion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ import vscode from "vscode";
import {Utils} from "./utils";

interface CompletionDetails {
completion: string;
completions: string[];
position: vscode.Position;
inputPrefix: string;
inputSuffix: string;
prompt: string;
complIndex: number;
}

export class Completion {
private app: Application
private isRequestInProgress = false
isForcedNewRequest = false
lastCompletion: CompletionDetails = {completion: "", position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""};
lastCompletion: CompletionDetails = {completions: [], complIndex: 0, position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""};

constructor(application: Application) {
this.app = application;
Expand Down Expand Up @@ -66,8 +67,8 @@ export class Completion {
try {
let data: LlamaResponse | undefined
let hashKey = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + prompt)
let completion = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt)
let isCachedResponse = !this.isForcedNewRequest && completion != undefined
let completions = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt)
let isCachedResponse = !this.isForcedNewRequest && completions != undefined
if (!isCachedResponse) {
this.isForcedNewRequest = false
if (token.isCancellationRequested){
Expand All @@ -78,46 +79,55 @@ export class Completion {
this.app.statusbar.showThinkingInfo();

data = await this.app.llamaServer.getFIMCompletion(inputPrefix, inputSuffix, prompt, this.app.extraContext.chunks, nindent)
if (data != undefined) completion = data.content;
else completion = undefined
if (data != undefined) completions = this.getComplFromContent(data);
else completions = undefined
}
if (completion == undefined || completion.trim() == ""){
if (completions == undefined || completions.length == 0){
this.app.statusbar.showInfo(undefined);
this.isRequestInProgress = false
this.app.logger.addEventLog(group, "NO_SUGGESTION_RETURN", "")
return [];
}

let suggestionLines = completion.split(/\r?\n/)
Utils.removeTrailingNewLines(suggestionLines);

if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) {
let newCompletions: string[] = []
let firstComplLines: string[] = []
for (let compl of completions){
let suggestionLines = compl.split(/\r?\n/)
Utils.removeTrailingNewLines(suggestionLines);

if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) {
continue
} else {
compl = this.updateSuggestion(suggestionLines, lineSuffix);
newCompletions.push(compl);
if (firstComplLines.length == 0) firstComplLines = suggestionLines;
}
}
if (newCompletions.length == 0){
this.app.statusbar.showInfo(undefined);
this.isRequestInProgress = false
this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "")
return [];
this.isRequestInProgress = false
this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "")
return [];
}

completion = this.updateSuggestion(suggestionLines, lineSuffix);

if (!isCachedResponse) this.app.lruResultCache.put(hashKey, completion)
this.lastCompletion = this.getCompletionDetails(completion, position, inputPrefix, inputSuffix, prompt);
if (!isCachedResponse && newCompletions) this.app.lruResultCache.put(hashKey, newCompletions)
this.lastCompletion = this.getCompletionDetails(newCompletions, position, inputPrefix, inputSuffix, prompt);

// Run async as not needed for the suggestion
setTimeout(async () => {
if (isCachedResponse) this.app.statusbar.showCachedInfo()
else this.app.statusbar.showInfo(data);
if (!token.isCancellationRequested && lineSuffix.trim() === ""){
await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, firstComplLines);
await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, firstComplLines);
}
if (!token.isCancellationRequested){
this.app.extraContext.addFimContextChunks(position, context, document);
}
}, 0);
this.isRequestInProgress = false
this.app.logger.addEventLog(group, "NORMAL_RETURN", suggestionLines[0])
return [this.getCompletion(this.removeLeadingSpaces(completion, spacesToRemove), position)];
this.app.logger.addEventLog(group, "NORMAL_RETURN", firstComplLines[0])
return this.getCompletion(newCompletions||[], position, spacesToRemove);
} catch (err) {
console.error("Error fetching llama completion:", err);
vscode.window.showInformationMessage(this.app.configuration.getUiText(`Error getting response. Please check if llama.cpp server is running.`)??"");
Expand Down Expand Up @@ -155,21 +165,36 @@ export class Completion {
let promptCut = prompt.slice(i)
let hash = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + newPrompt)
let result = this.app.lruResultCache.get(hash)
if (result != undefined && promptCut == result.slice(0,promptCut.length)) return result.slice(prompt.length - newPrompt.length)
if (result == undefined) continue
let completions: string[] = []
for (const compl of result){
if (compl && promptCut == compl.slice(0,promptCut.length)) {
completions.push(compl.slice(prompt.length - newPrompt.length))
}
}
if (completions.length > 0) return completions;
}

return undefined
}

getCompletion = (completion: string, position: vscode.Position) => {
return new vscode.InlineCompletionItem(
completion,
new vscode.Range(position, position)
);
getCompletion = (completions: string[],
position: vscode.Position,
spacesToRemove: number): vscode.InlineCompletionItem[] => {
let completionItems: vscode.InlineCompletionItem[] = []
for (const completion of completions){
const compl: vscode.InlineCompletionItem = new vscode.InlineCompletionItem(
this.removeLeadingSpaces(completion, spacesToRemove),
new vscode.Range(position, position)
)
completionItems.push(compl);
}

return completionItems;
}

private getCompletionDetails = (completion: string, position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => {
return { completion: completion, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt };
private getCompletionDetails = (completions: string[], position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => {
return { completions: completions,complIndex: 0, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt };
}

// logic for discarding predictions that repeat existing text
Expand Down Expand Up @@ -241,14 +266,17 @@ export class Completion {
let cached_completion = this.app.lruResultCache.get(futureHashKey)
if (cached_completion != undefined) return;
let futureData = await this.app.llamaServer.getFIMCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.app.extraContext.chunks, prompt.length - prompt.trimStart().length);
let futureSuggestion = "";
let futureSuggestions = [];
if (futureData != undefined && futureData.content != undefined && futureData.content.trim() != "") {
futureSuggestion = futureData.content;
let suggestionLines = futureSuggestion.split(/\r?\n/)
Utils.removeTrailingNewLines(suggestionLines);
futureSuggestion = suggestionLines.join('\n')
let suggestions = this.getComplFromContent(futureData);
for (let futureSuggestion of suggestions||[]){
let suggestionLines = futureSuggestion.split(/\r?\n/)
Utils.removeTrailingNewLines(suggestionLines);
futureSuggestion = suggestionLines.join('\n')
futureSuggestions.push(futureSuggestion)
}
let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt);
this.app.lruResultCache.put(futureHashKey, futureSuggestion);
this.app.lruResultCache.put(futureHashKey, futureSuggestions);
}
}

Expand All @@ -262,13 +290,13 @@ export class Completion {
let futureSuggestion = suggestionLines.slice(1).join('\n')
let cached_completion = this.app.lruResultCache.get(futureHashKey)
if (cached_completion != undefined) return;
else this.app.lruResultCache.put(futureHashKey, futureSuggestion)
else this.app.lruResultCache.put(futureHashKey, [futureSuggestion])
}
}

insertNextWord = async (editor: vscode.TextEditor) => {
// Retrieve the last inline completion item
const lastSuggestion = this.lastCompletion.completion;
const lastSuggestion = this.lastCompletion.completions[this.lastCompletion.complIndex];
if (!lastSuggestion) {
return;
}
Expand All @@ -294,7 +322,7 @@ export class Completion {

insertFirstLine = async (editor: vscode.TextEditor) => {
// Retrieve the last inline completion item
const lastItem = this.lastCompletion.completion;
const lastItem = this.lastCompletion.completions[this.lastCompletion.complIndex];
if (!lastItem) {
return;
}
Expand All @@ -311,4 +339,36 @@ export class Completion {
editBuilder.insert(position, insertLine);
});
}

increaseSuggestionIndex = async () => {
const totalCompletions = this.lastCompletion.completions.length
if (totalCompletions > 0){
this.lastCompletion.complIndex = (this.lastCompletion.complIndex + 1) % totalCompletions
}
}

decreaseSuggestionIndex = async () => {
const totalCompletions = this.lastCompletion.completions.length
if (totalCompletions > 0){
if (this.lastCompletion.complIndex > 0) this.lastCompletion.complIndex--
else this.lastCompletion.complIndex = totalCompletions - 1
}
}

private getComplFromContent(codeCompletions: any): string[] | undefined {
if ("content" in codeCompletions)
return codeCompletions.content??""

if (codeCompletions.length > 0){
let completions: Set<string> = new Set()
for (const compl of codeCompletions){
completions.add(compl.content??"")
}
return Array.from(completions);
}
else return [];
}

// Function for printing the first n Fibonachi numbers

}
2 changes: 2 additions & 0 deletions src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ export class Configuration {
chats_max_tokens = 64000;
chats_summarize_old_msgs = false;
chats_msgs_keep = 50
max_parallel_completions = 3
completion_models_list = new Array();
embeddings_models_list = new Array();
tools_models_list = new Array();
Expand Down Expand Up @@ -239,6 +240,7 @@ export class Configuration {
this.tools_log_calls = Boolean(config.get<boolean>("tools_log_calls"));
this.chats_max_history = Number(config.get<number>("chats_max_history"));
this.chats_max_tokens = Number(config.get<number>("chats_max_tokens"));
this.max_parallel_completions = Number(config.get<number>("max_parallel_completions"));
this.chats_summarize_old_msgs = Boolean(config.get<boolean>("chats_summarize_old_msgs"));
this.chats_msgs_keep = Number(config.get<number>("chats_msgs_keep"));
this.skills_folder = String(config.get<string>("skills_folder"));
Expand Down
2 changes: 2 additions & 0 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ export function activate(context: vscode.ExtensionContext) {
app.architect.registerGenarateCommitMsg(context)
app.architect.registerCommandKillAgent(context)
app.architect.registerWebviewProvider(context)
app.architect.registerCommandSelectNextSuggestion(context)
app.architect.registerCommandSelectPreviousSuggestion(context)
app.architect.init()
}

Expand Down
5 changes: 3 additions & 2 deletions src/llama-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ export class LlamaServer {
input_suffix: inputSuffix,
input_extra: chunks,
prompt,
n_predict: 0,
n_predict: 0,
samplers: [],
cache_prompt: true,
t_max_prompt_ms: this.app.configuration.t_max_prompt_ms,
Expand All @@ -125,9 +125,10 @@ export class LlamaServer {
return {
input_prefix: inputPrefix,
input_suffix: inputSuffix,
input_extra: chunks,
input_extra: chunks,
prompt,
n_predict: this.app.configuration.n_predict,
n_cmpl: this.app.configuration.max_parallel_completions,
...this.defaultRequestParams,
...(nindent && { n_indent: nindent }),
t_max_prompt_ms: this.app.configuration.t_max_prompt_ms,
Expand Down
6 changes: 3 additions & 3 deletions src/lru-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as crypto from 'crypto';

export class LRUCache {
private capacity: number;
private map: Map<string, string>;
private map: Map<string, string[]>;

constructor(capacity: number) {
if (capacity <= 0) {
Expand All @@ -18,7 +18,7 @@ export class LRUCache {
* @param key The key to retrieve.
* @returns The value associated with the key, or undefined if the key is not found.
*/
get = (key: string): string | undefined => {
get = (key: string): string[] | undefined => {
if (!this.map.has(key)) {
return undefined;
}
Expand All @@ -37,7 +37,7 @@ export class LRUCache {
* @param key The key to insert or update.
* @param value The value to associate with the key.
*/
put = (key: string, value: string): void => {
put = (key: string, value: string[]): void => {
if (this.map.has(key)) {
// If the key exists, delete it to refresh its position
this.map.delete(key);
Expand Down