From 30a5f99241baa96e790cb3dca88a5ec4ff9a96d7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 16:06:40 +0200 Subject: [PATCH 1/8] inst : initial --- autoload/llama.vim | 376 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 340 insertions(+), 36 deletions(-) diff --git a/autoload/llama.vim b/autoload/llama.vim index 08f9151..c1d7316 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -1,13 +1,25 @@ " vim: ts=4 sts=4 expandtab " colors (adjust to your liking) -highlight default llama_hl_hint guifg=#ff772f ctermfg=202 -highlight default llama_hl_info guifg=#77ff2f ctermfg=119 + +" fim colors +highlight default llama_hl_fim_hint guifg=#ff772f ctermfg=202 +highlight default llama_hl_fim_info guifg=#77ff2f ctermfg=119 + + " instruct colors for selected block + highlight default llama_hl_inst_src guibg=#333333 ctermbg=236 + + " virtual text colors for instructions + highlight default llama_hl_inst_virt_proc guifg=#77ff2f ctermfg=119 + highlight default llama_hl_inst_virt_gen guifg=#77ff2f ctermfg=119 + highlight default llama_hl_inst_virt_ready guifg=#ff772f ctermfg=202 " general parameters: " -" endpoint: llama.cpp server endpoint +" endpoint_fim: llama.cpp server endpoint for FIM completion +" endpoint_inst: llama.cpp server endpoint for instruction completion +" model_fim: model name in case when multiple models are loaded (optional) +" model_inst: instruction model name (optional) " api_key: llama.cpp server api key (optional) -" model: model name in case when multiple models are loaded (optional) " n_prefix: number of lines before the cursor location to include in the local prefix " n_suffix: number of lines after the cursor location to include in the local suffix " n_predict: max number of tokens to predict @@ -44,11 +56,14 @@ highlight default llama_hl_info guifg=#77ff2f ctermfg=119 " keymap_fim_accept_line: keymap to accept line suggestion, default: " keymap_fim_accept_word: keymap to accept word suggestion, default: " keymap_debug_toggle: keymap to toggle the debug pane, default: null +" keymap_inst_trigger: keymap to trigger the instruction command, default: null " let s:default_config = { - \ 'endpoint': 'http://127.0.0.1:8012/infill', + \ 'endpoint_fim': 'http://127.0.0.1:8012/infill', + \ 'endpoint_inst': 'http://127.0.0.1:8012/v1/chat/completions', + \ 'model_fim': '', + \ 'model_inst': '', \ 'api_key': '', - \ 'model': '', \ 'n_prefix': 256, \ 'n_suffix': 64, \ 'n_predict': 128, @@ -68,13 +83,17 @@ let s:default_config = { \ 'keymap_fim_accept_line': "", \ 'keymap_fim_accept_word': "", \ 'keymap_debug_toggle': v:null, + \ 'keymap_inst_trigger': v:null, \ 'enable_at_startup': v:true, + \ 'timeout_inst': 30000, \ } let llama_config = get(g:, 'llama_config', s:default_config) " rename deprecated keys in `llama_config`. let s:renames = { + \ 'endpoint' : 'endpoint_fim', + \ 'model' : 'model_fim', \ 'keymap_trigger' : 'keymap_fim_trigger', \ 'keymap_accept_full' : 'keymap_fim_accept_full', \ 'keymap_accept_line' : 'keymap_fim_accept_line', @@ -170,6 +189,9 @@ function! llama#disable() exe "silent! iunmap " .. g:llama_config.keymap_fim_accept_word exe "silent! unmap " .. g:llama_config.keymap_debug_toggle + exe "silent! vunmap " .. g:llama_config.keymap_inst_trigger + exe "silent! unmap " + exe "silent! unmap " let s:llama_enabled = v:false @@ -200,6 +222,8 @@ function! llama#setup() command! LlamaToggle call llama#toggle() command! LlamaToggleAutoFim call llama#toggle_auto_fim() + command! -range=% LlamaInstruct call llama#inst(, ) + call llama#debug_setup() endfunction @@ -221,14 +245,17 @@ function! llama#init() let s:ring_queued = [] " chunks that are queued to be sent for processing let s:ring_n_evict = 0 - let s:hint_shown = v:false + let s:fim_hint_shown = v:false let s:pos_y_pick = -9999 " last y where we picked a chunk let s:indent_last = -1 " last indentation level that was accepted (TODO: this might be buggy) let s:timer_fim = -1 let s:t_last_move = reltime() " last time the cursor moved - let s:current_job = v:null + let s:current_job_fim = v:null + + let s:inst_requests = [] + let s:inst_request_id = 0 let s:ghost_text_nvim = exists('*nvim_buf_get_mark') let s:ghost_text_vim = has('textprop') @@ -237,8 +264,8 @@ function! llama#init() if version < 901 echom 'Warning: llama.vim requires version 901 or greater. Current version: ' . version endif - let s:hlgroup_hint = 'llama_hl_hint' - let s:hlgroup_info = 'llama_hl_info' + let s:hlgroup_hint = 'llama_hl_fim_hint' + let s:hlgroup_info = 'llama_hl_fim_info' if empty(prop_type_get(s:hlgroup_hint)) call prop_type_add(s:hlgroup_hint, {'highlight': s:hlgroup_hint}) @@ -288,6 +315,10 @@ function! llama#enable() " setup keymaps exe "autocmd InsertEnter * inoremap " . g:llama_config.keymap_fim_trigger . " llama#fim_inline(v:false, v:false)" exe "nnoremap " .. g:llama_config.keymap_debug_toggle .. " :call llama#debug_toggle()" + exe "vnoremap " .. g:llama_config.keymap_inst_trigger .. " :LlamaInstruct" + + exe "nnoremap :call llama#inst_accept()" + exe "nnoremap :call llama#inst_cancel()" call llama#setup_autocmds() @@ -466,13 +497,13 @@ function! s:ring_update() \ "--silent", \ "--no-buffer", \ "--request", "POST", - \ "--url", g:llama_config.endpoint, + \ "--url", g:llama_config.endpoint_fim, \ "--header", "Content-Type: application/json", \ "--data", "@-", \ ] - if exists ("g:llama_config.model") && len("g:llama_config.model") > 0 - let l:request['model'] = g:llama_config.model + if exists ("g:llama_config.model_fim") && len("g:llama_config.model_fim") > 0 + let l:request['model'] = g:llama_config.model_fim end if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 @@ -493,6 +524,10 @@ function! s:ring_update() endif endfunction +" ===================================== +" Fill-in-Middle (FIM) completion +" ===================================== + " get the local context at a specified position " a:prev can optionally contain a previous completion for this position " in such cases, create the local context as if the completion was already inserted @@ -578,7 +613,7 @@ function! llama#fim_inline(is_auto, use_cache) abort endif " we already have a suggestion displayed - hide it - if s:hint_shown && !a:is_auto + if s:fim_hint_shown && !a:is_auto call llama#fim_hide() return '' endif @@ -603,7 +638,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort endif " avoid sending repeated requests too fast - if s:current_job != v:null + if s:current_job_fim != v:null if s:timer_fim != -1 call timer_stop(s:timer_fim) let s:timer_fim = -1 @@ -613,7 +648,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort return endif - "if s:hint_shown && empty(a:prev) + "if s:fim_hint_shown && empty(a:prev) " return "endif @@ -729,44 +764,44 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort \ "--silent", \ "--no-buffer", \ "--request", "POST", - \ "--url", g:llama_config.endpoint, + \ "--url", g:llama_config.endpoint_fim, \ "--header", "Content-Type: application/json", \ "--data", "@-", \ ] - if exists ("g:llama_config.model") && len("g:llama_config.model") > 0 - let l:request['model'] = g:llama_config.model + if exists ("g:llama_config.model_fim") && len("g:llama_config.model_fim") > 0 + let l:request['model'] = g:llama_config.model_fim end if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) endif - if s:current_job != v:null + if s:current_job_fim != v:null if s:ghost_text_nvim - call jobstop(s:current_job) + call jobstop(s:current_job_fim) elseif s:ghost_text_vim - call job_stop(s:current_job) + call job_stop(s:current_job_fim) endif endif " send the request asynchronously let l:request_json = json_encode(l:request) if s:ghost_text_nvim - let s:current_job = jobstart(l:curl_command, { + let s:current_job_fim = jobstart(l:curl_command, { \ 'on_stdout': function('s:fim_on_response', [l:hashes]), \ 'on_exit': function('s:fim_on_exit'), \ 'stdout_buffered': v:true \ }) - call chansend(s:current_job, l:request_json) - call chanclose(s:current_job, 'stdin') + call chansend(s:current_job_fim, l:request_json) + call chanclose(s:current_job_fim, 'stdin') elseif s:ghost_text_vim - let s:current_job = job_start(l:curl_command, { + let s:current_job_fim = job_start(l:curl_command, { \ 'out_cb': function('s:fim_on_response', [l:hashes]), \ 'exit_cb': function('s:fim_on_exit') \ }) - let channel = job_getchannel(s:current_job) + let channel = job_getchannel(s:current_job_fim) call ch_sendraw(channel, l:request_json) call ch_close_in(channel) endif @@ -819,7 +854,7 @@ function! s:fim_on_response(hashes, job_id, data, event = v:null) endfor " if nothing is currently displayed - show the hint directly - if !s:hint_shown || !s:fim_data['can_accept'] + if !s:fim_hint_shown || !s:fim_data['can_accept'] " log only non-speculative fims for now call llama#debug_log('fim_on_response', get(json_decode(l:raw), 'content', '')) @@ -835,7 +870,7 @@ function! s:fim_on_exit(job_id, exit_code, event = v:null) echom "Job failed with exit code: " . a:exit_code endif - let s:current_job = v:null + let s:current_job_fim = v:null endfunction function! s:on_move() @@ -910,7 +945,7 @@ function! s:fim_try_hint(pos_x, pos_y) call s:fim_render(l:pos_x, l:pos_y, l:raw) " run async speculative FIM in the background for this position - if s:hint_shown + if s:fim_hint_shown call llama#fim(l:pos_x, l:pos_y, v:true, s:fim_data['content'], v:true) endif endif @@ -1090,12 +1125,12 @@ function! s:fim_render(pos_x, pos_y, data) " display the suggestion and append the info to the end of the first line if s:ghost_text_nvim call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, l:pos_y - 1, l:pos_x, { - \ 'virt_text': [[l:content[0], 'llama_hl_hint'], [l:info, 'llama_hl_info']], + \ 'virt_text': [[l:content[0], 'llama_hl_fim_hint'], [l:info, 'llama_hl_fim_info']], \ 'virt_text_pos': l:content == [""] ? 'eol' : 'overlay' \ }) call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, l:pos_y - 1, 0, { - \ 'virt_lines': map(l:content[1:], {idx, val -> [[val, 'llama_hl_hint']]}) + \ 'virt_lines': map(l:content[1:], {idx, val -> [[val, 'llama_hl_fim_hint']]}) \ }) elseif s:ghost_text_vim let l:full_suffix = l:content[0] @@ -1128,7 +1163,7 @@ function! s:fim_render(pos_x, pos_y, data) exe 'inoremap ' . g:llama_config.keymap_fim_accept_line . ' :call llama#fim_accept(''line'')' exe 'inoremap ' . g:llama_config.keymap_fim_accept_word . ' :call llama#fim_accept(''word'')' - let s:hint_shown = v:true + let s:fim_hint_shown = v:true let s:fim_data['pos_x'] = l:pos_x let s:fim_data['pos_y'] = l:pos_y @@ -1189,7 +1224,7 @@ function! llama#fim_accept(accept_type) endfunction function! llama#fim_hide() - let s:hint_shown = v:false + let s:fim_hint_shown = v:false " clear the virtual text let l:bufnr = bufnr('%') @@ -1208,10 +1243,279 @@ function! llama#fim_hide() exe 'silent! iunmap ' . g:llama_config.keymap_fim_accept_word endfunction -function! llama#is_hint_shown() - return s:hint_shown +" ===================================== +" Instruct-based editing +" ===================================== + +function! llama#inst(start, end) + let l:lines = getline(a:start, a:end) + let l:inst = input('Instruction: ') + if empty(l:inst) + return + endif + call llama#inst_send(a:start, a:end, l:lines, l:inst, function('s:inst_callback', [a:start, a:end])) endfunction +function! llama#inst_send(start, end, lines, inst, callback) + " Create request state + let l:request_id = s:inst_request_id + let s:inst_request_id += 1 + + let l:end = min([a:end, line('$')]) + + let l:req = { + \ 'id': l:request_id, + \ 'range': [a:start, l:end], + \ 'status': 'proc', + \ 'result': '', + \ 'instruction': a:inst, + \ 'job': v:null, + \ 'extmark': -1, + \ 'extmark_virt': -1, + \ } + + call add(s:inst_requests, l:req) + + " highlights the selected text + let l:bufnr = bufnr('%') + if s:ghost_text_nvim + let l:ns = nvim_create_namespace('vt_inst') + let l:req.extmark = nvim_buf_set_extmark(l:bufnr, l:ns, a:start - 1, 0, { + \ 'end_row': l:end - 1, + \ 'end_col': len(getline(l:end)), + \ 'hl_group': 'llama_hl_inst_src' + \ }) + elseif s:ghost_text_vim + " TODO: implement classic Vim support + endif + + " Initialize virtual text with processing status + call s:inst_update(l:request_id, 'proc') + + " Build the payload + let l:system_message = {'role': 'system', 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.'} + + " extra context + " TODO: deduplicate + let l:extra_context = [] + for l:chunk in s:ring_chunks + call add(l:extra_context, { + \ 'text': l:chunk.str, + \ 'time': l:chunk.time, + \ 'filename': l:chunk.filename + \ }) + endfor + + let l:user_content = "" + let l:user_content .= "--- context ----------------------------------------------------\n" + let l:user_content .= join(l:extra_context, "\n") . "\n" + let l:user_content .= "--- instruction ------------------------------------------------\n" + let l:user_content .= a:inst . "\n" + let l:user_content .= "--- selection --------------------------------------------------\n" + let l:user_content .= join(a:lines, "\n") . "\n" + let l:user_content .= "--- result -----------------------------------------------------\n" + + let l:user_message = {'role': 'user', 'content': l:user_content} + + let l:messages = [l:system_message, l:user_message] + + let l:request = { + \ 'model': g:llama_config.model_inst, + \ 'messages': l:messages, + \ 'temperature': 0.0, + \ 'stream': v:false, + \ } + + call llama#debug_log('inst_send | ' . a:inst, l:user_content) + + let l:curl_command = [ + \ "curl", + \ "--silent", + \ "--no-buffer", + \ "--request", "POST", + \ "--url", g:llama_config.endpoint_inst, + \ "--header", "Content-Type: application/json", + \ "--data", "@-", + \ ] + + if exists("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 + call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) + endif + + let l:request_json = json_encode(l:request) + + if s:ghost_text_nvim + let l:req.job = jobstart(l:curl_command, { + \ 'on_stdout': function('s:inst_on_response', [l:request_id, a:callback]), + \ 'on_exit': function('s:inst_on_exit', [l:request_id, a:callback]), + \ 'stdout_buffered': v:true + \ }) + call chansend(l:req.job, l:request_json) + call chanclose(l:req.job, 'stdin') + elseif s:ghost_text_vim + let l:req.job = job_start(l:curl_command, { + \ 'out_cb': function('s:inst_on_response', [l:request_id, a:callback]), + \ 'exit_cb': function('s:inst_on_exit', [l:request_id, a:callback]) + \ }) + + let channel = job_getchannel(l:req.job) + call ch_sendraw(channel, l:request_json) + call ch_close_in(channel) + endif +endfunction + +function! s:inst_update(id, status) + for l:req in s:inst_requests + if l:req.id == a:id + let l:req.status = a:status + if s:ghost_text_nvim + let l:ns = nvim_create_namespace('vt_inst') + " Clear existing virtual text extmark if it exists + if l:req.extmark_virt != -1 + call nvim_buf_del_extmark(bufnr('%'), l:ns, l:req.extmark_virt) + let l:req.extmark_virt = -1 + endif + " Create virtual text extmark + let l:virt_lines = [] + let l:separator = '=====================================' + if a:status == 'ready' + let l:result_lines = split(l:req.result, "\n") + let l:virt_lines = [[[l:separator, 'llama_hl_inst_virt_ready']]] + map(l:result_lines, {idx, val -> [[val, 'llama_hl_inst_virt_ready']]}) + elseif a:status == 'proc' + let l:instruction_truncated = l:req.instruction + if len(l:instruction_truncated) > 64 + let l:instruction_truncated = l:instruction_truncated[:63] . '...' + endif + let l:virt_lines = [ + \ [[l:separator, 'llama_hl_inst_virt_proc']], + \ [[printf('(%s) Processing ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_proc']], + \ [['Instruction: ' . l:instruction_truncated, 'llama_hl_inst_virt_proc']] + \ ] + elseif a:status == 'gen' + let l:virt_lines = [ + \ [[l:separator, 'llama_hl_inst_virt_gen']], + \ [[printf('(%s) Generating ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_gen']] + \ ] + endif + + if !empty(l:virt_lines) + let l:req.extmark_virt = nvim_buf_set_extmark(bufnr('%'), l:ns, l:req.range[1] - 1, 0, { + \ 'virt_lines': l:virt_lines + \ }) + endif + elseif s:ghost_text_vim + " TODO: implement classic Vim support + endif + break + endif + endfor +endfunction + +function! s:inst_on_response(id, callback, job_id, data, event = v:null) + call s:inst_update(a:id, 'gen') + + if s:ghost_text_nvim + let l:raw = join(a:data, "\n") + elseif s:ghost_text_vim + let l:raw = a:data + endif + + if len(l:raw) == 0 + return + endif + + let l:content = '' + try + let l:response = json_decode(l:raw) + let l:content = get(l:response, 'choices', [{}])[0].message.content + catch + " Assume plain text response + let l:content = l:raw + endtry + + " Store result + for l:req in s:inst_requests + if l:req.id == a:id + let l:req.result = l:content + break + endif + endfor +endfunction + +function! s:inst_on_exit(id, callback, job_id, exit_code, event = v:null) + if a:exit_code != 0 + echohl ErrorMsg + echo "Instruct job failed with exit code: " . a:exit_code + echohl None + call s:inst_remove(a:id) + return + endif + + call s:inst_update(a:id, 'ready') +endfunction + +function! s:inst_remove(id) + for i in range(len(s:inst_requests)) + if s:inst_requests[i].id == a:id + " Clear extmarks + if s:ghost_text_nvim + let l:ns = nvim_create_namespace('vt_inst') + call nvim_buf_del_extmark(bufnr('%'), l:ns, s:inst_requests[i].extmark) + if s:inst_requests[i].extmark_virt != -1 + call nvim_buf_del_extmark(bufnr('%'), l:ns, s:inst_requests[i].extmark_virt) + endif + endif + call remove(s:inst_requests, i) + break + endif + endfor +endfunction + +function! s:inst_callback(start, end, result) + let l:result_lines = split(a:result, "\n", 1) + " Remove trailing empty lines + while len(l:result_lines) > 0 && l:result_lines[-1] == "" + call remove(l:result_lines, -1) + endwhile + + let l:num_result = len(l:result_lines) + let l:num_original = a:end - a:start + 1 + + " Delete the original range + call deletebufline(bufnr('%'), a:start, a:end) + + " Insert the new lines + call append(a:start - 1, l:result_lines) +endfunction + +function! llama#inst_accept() + let l:line = line('.') + for l:req in s:inst_requests + if l:req.status == 'ready' && l:line >= l:req.range[0] && l:line <= l:req.range[1] + call s:inst_callback(l:req.range[0], l:req.range[1], l:req.result) + call s:inst_remove(l:req.id) + return + endif + endfor + " If not in range, do normal Tab + call feedkeys("\", 'n') +endfunction + +function! llama#inst_cancel() + let l:line = line('.') + for l:req in s:inst_requests + if l:line >= l:req.range[0] && l:line <= l:req.range[1] + call s:inst_remove(l:req.id) + return + endif + endfor + " If not in range, do normal Esc (nothing) +endfunction + +" ===================================== +" Debug helpers +" ===================================== + function! llama#debug_log(msg, ...) abort return call('llama_debug#log', [a:msg] + a:000) endfunction From 3d7413205e9e925487dddd097b0b1e0d4598a676 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 18:27:53 +0200 Subject: [PATCH 2/8] cont : fix extmark position updates --- autoload/llama.vim | 145 ++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 56 deletions(-) diff --git a/autoload/llama.vim b/autoload/llama.vim index c1d7316..4e0f473 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -82,10 +82,12 @@ let s:default_config = { \ 'keymap_fim_accept_full': "", \ 'keymap_fim_accept_line': "", \ 'keymap_fim_accept_word': "", - \ 'keymap_debug_toggle': v:null, \ 'keymap_inst_trigger': v:null, + \ 'keymap_inst_accept': "", + \ 'keymap_inst_cancel': "", + \ 'keymap_debug_toggle': v:null, \ 'enable_at_startup': v:true, - \ 'timeout_inst': 30000, + \ 'timeout_inst': 60000, \ } let llama_config = get(g:, 'llama_config', s:default_config) @@ -190,8 +192,8 @@ function! llama#disable() exe "silent! unmap " .. g:llama_config.keymap_debug_toggle exe "silent! vunmap " .. g:llama_config.keymap_inst_trigger - exe "silent! unmap " - exe "silent! unmap " + exe "silent! unmap " .. g:llama_config.keymap_inst_accept + exe "silent! unmap " .. g:llama_config.keymap_inst_cancel let s:llama_enabled = v:false @@ -315,10 +317,10 @@ function! llama#enable() " setup keymaps exe "autocmd InsertEnter * inoremap " . g:llama_config.keymap_fim_trigger . " llama#fim_inline(v:false, v:false)" exe "nnoremap " .. g:llama_config.keymap_debug_toggle .. " :call llama#debug_toggle()" - exe "vnoremap " .. g:llama_config.keymap_inst_trigger .. " :LlamaInstruct" - exe "nnoremap :call llama#inst_accept()" - exe "nnoremap :call llama#inst_cancel()" + exe "vnoremap " .. g:llama_config.keymap_inst_trigger .. " :LlamaInstruct" + exe "nnoremap " .. g:llama_config.keymap_inst_accept .. " :call llama#inst_accept()" + exe "nnoremap " .. g:llama_config.keymap_inst_cancel .. " :call llama#inst_cancel()" call llama#setup_autocmds() @@ -484,8 +486,8 @@ function! s:ring_update() \ 'prompt': "", \ 'n_predict': 0, \ 'temperature': 0.0, - \ 'stream': v:false, \ 'samplers': [], + \ 'stream': v:false, \ 'cache_prompt': v:true, \ 't_max_prompt_ms': 1, \ 't_max_predict_ms': 1, @@ -739,8 +741,8 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort \ 'n_indent': l:indent, \ 'top_k': 40, \ 'top_p': 0.90, - \ 'stream': v:false, \ 'samplers': ["top_k", "top_p", "infill"], + \ 'stream': v:false, \ 'cache_prompt': v:true, \ 't_max_prompt_ms': g:llama_config.t_max_prompt_ms, \ 't_max_predict_ms': l:t_max_predict_ms, @@ -1253,6 +1255,7 @@ function! llama#inst(start, end) if empty(l:inst) return endif + call llama#inst_send(a:start, a:end, l:lines, l:inst, function('s:inst_callback', [a:start, a:end])) endfunction @@ -1268,7 +1271,7 @@ function! llama#inst_send(start, end, lines, inst, callback) \ 'range': [a:start, l:end], \ 'status': 'proc', \ 'result': '', - \ 'instruction': a:inst, + \ 'inst': a:inst, \ 'job': v:null, \ 'extmark': -1, \ 'extmark_virt': -1, @@ -1293,7 +1296,10 @@ function! llama#inst_send(start, end, lines, inst, callback) call s:inst_update(l:request_id, 'proc') " Build the payload - let l:system_message = {'role': 'system', 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.'} + let l:system_message = { + \ 'role': 'system', + \ 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.' + \ } " extra context " TODO: deduplicate @@ -1320,10 +1326,12 @@ function! llama#inst_send(start, end, lines, inst, callback) let l:messages = [l:system_message, l:user_message] let l:request = { - \ 'model': g:llama_config.model_inst, - \ 'messages': l:messages, - \ 'temperature': 0.0, - \ 'stream': v:false, + \ 'model': g:llama_config.model_inst, + \ 'messages': l:messages, + \ 'min_p': 0.1, + \ 'samplers': ["min_p"], + \ 'stream': v:false, + \ 'cache_prompt': v:true, \ } call llama#debug_log('inst_send | ' . a:inst, l:user_content) @@ -1364,45 +1372,63 @@ function! llama#inst_send(start, end, lines, inst, callback) endif endfunction +function! llama#inst_update_pos(req) + let l:bufnr = bufnr('%') + let l:ns = nvim_create_namespace('vt_inst') + + let l:extmark_pos = nvim_buf_get_extmark_by_id(l:bufnr, l:ns, a:req.extmark, {}) + if empty(l:extmark_pos) + continue + endif + + let l:extmark_line = l:extmark_pos[0] + 1 + let a:req.range[1] = l:extmark_line + a:req.range[1] - a:req.range[0] + let a:req.range[0] = l:extmark_line +endfunction + function! s:inst_update(id, status) for l:req in s:inst_requests if l:req.id == a:id let l:req.status = a:status - if s:ghost_text_nvim - let l:ns = nvim_create_namespace('vt_inst') - " Clear existing virtual text extmark if it exists - if l:req.extmark_virt != -1 - call nvim_buf_del_extmark(bufnr('%'), l:ns, l:req.extmark_virt) - let l:req.extmark_virt = -1 - endif - " Create virtual text extmark - let l:virt_lines = [] - let l:separator = '=====================================' - if a:status == 'ready' - let l:result_lines = split(l:req.result, "\n") - let l:virt_lines = [[[l:separator, 'llama_hl_inst_virt_ready']]] + map(l:result_lines, {idx, val -> [[val, 'llama_hl_inst_virt_ready']]}) - elseif a:status == 'proc' - let l:instruction_truncated = l:req.instruction - if len(l:instruction_truncated) > 64 - let l:instruction_truncated = l:instruction_truncated[:63] . '...' - endif - let l:virt_lines = [ - \ [[l:separator, 'llama_hl_inst_virt_proc']], - \ [[printf('(%s) Processing ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_proc']], - \ [['Instruction: ' . l:instruction_truncated, 'llama_hl_inst_virt_proc']] - \ ] - elseif a:status == 'gen' - let l:virt_lines = [ - \ [[l:separator, 'llama_hl_inst_virt_gen']], - \ [[printf('(%s) Generating ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_gen']] - \ ] - endif - - if !empty(l:virt_lines) - let l:req.extmark_virt = nvim_buf_set_extmark(bufnr('%'), l:ns, l:req.range[1] - 1, 0, { - \ 'virt_lines': l:virt_lines - \ }) - endif + call llama#inst_update_pos(l:req) + + if s:ghost_text_nvim + let l:ns = nvim_create_namespace('vt_inst') + + if l:req.extmark_virt != -1 + call nvim_buf_del_extmark(bufnr('%'), l:ns, l:req.extmark_virt) + let l:req.extmark_virt = -1 + endif + + let l:inst_trunc = l:req.inst + if len(l:inst_trunc) > 64 + let l:inst_trunc = l:inst_trunc[:63] . '...' + endif + + let l:virt_lines = [] + let l:separator = '=====================================' + if a:status == 'ready' + let l:result_lines = split(l:req.result, "\n") + let l:virt_lines = [[[l:separator, 'llama_hl_inst_virt_ready']]] + map(l:result_lines, {idx, val -> [[val, 'llama_hl_inst_virt_ready']]}) + elseif a:status == 'proc' + let l:virt_lines = [ + \ [[l:separator, 'llama_hl_inst_virt_proc']], + \ [[printf('(%s) Processing ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_proc']], + \ [['Instruction: ' . l:inst_trunc, 'llama_hl_inst_virt_proc']] + \ ] + elseif a:status == 'gen' + let l:virt_lines = [ + \ [[l:separator, 'llama_hl_inst_virt_gen']], + \ [[printf('(%s) Generating ...', g:llama_config.endpoint_inst), 'llama_hl_inst_virt_gen']], + \ [['Instruction: ' . l:inst_trunc, 'llama_hl_inst_virt_gen']] + \ ] + endif + + if !empty(l:virt_lines) + let l:req.extmark_virt = nvim_buf_set_extmark(bufnr('%'), l:ns, l:req.range[1] - 1, 0, { + \ 'virt_lines': l:virt_lines + \ }) + endif elseif s:ghost_text_vim " TODO: implement classic Vim support endif @@ -1457,7 +1483,6 @@ endfunction function! s:inst_remove(id) for i in range(len(s:inst_requests)) if s:inst_requests[i].id == a:id - " Clear extmarks if s:ghost_text_nvim let l:ns = nvim_create_namespace('vt_inst') call nvim_buf_del_extmark(bufnr('%'), l:ns, s:inst_requests[i].extmark) @@ -1473,6 +1498,7 @@ endfunction function! s:inst_callback(start, end, result) let l:result_lines = split(a:result, "\n", 1) + " Remove trailing empty lines while len(l:result_lines) > 0 && l:result_lines[-1] == "" call remove(l:result_lines, -1) @@ -1490,20 +1516,27 @@ endfunction function! llama#inst_accept() let l:line = line('.') + for l:req in s:inst_requests - if l:req.status == 'ready' && l:line >= l:req.range[0] && l:line <= l:req.range[1] - call s:inst_callback(l:req.range[0], l:req.range[1], l:req.result) - call s:inst_remove(l:req.id) - return + if l:req.status ==# 'ready' + call llama#inst_update_pos(l:req) + + if l:line >= l:req.range[0] && l:line <= l:req.range[1] + call s:inst_callback(l:req.range[0], l:req.range[1], l:req.result) + call s:inst_remove(l:req.id) + return + endif endif endfor - " If not in range, do normal Tab + call feedkeys("\", 'n') endfunction function! llama#inst_cancel() let l:line = line('.') for l:req in s:inst_requests + call llama#inst_update_pos(l:req) + if l:line >= l:req.range[0] && l:line <= l:req.range[1] call s:inst_remove(l:req.id) return From 2c887282398f55cdfa8c5e12a783aa75bfa4faab Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:02:13 +0200 Subject: [PATCH 3/8] cont : update docs --- README.md | 7 ++++++- autoload/llama.vim | 3 +-- doc/llama.txt | 19 ++++++++++++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4e6ded8..ae24db7 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Local LLM-assisted text completion. - Toggle the suggestion manually by pressing `Ctrl+F` - Accept a suggestion with `Tab` - Accept the first line of a suggestion with `Shift+Tab` +- Instruction-based editing (instruct mode) with `Ctrl+I` - Control max text generation time - Configure scope of context around the cursor - Ring context with chunks from open and edited files and yanked text @@ -83,10 +84,14 @@ Examples: 4. Changing accept line keymap +5. Configure instruction-based editing trigger keymap + ```vim - let g:llama_config.keymap_accept_full = "" + let g:llama_config.keymap_inst_trigger = "" ``` +5. Configure instruction-based editing trigger keymap + Please refer to `:help llama_config` or the [source](./autoload/llama.vim) for the full list of options. diff --git a/autoload/llama.vim b/autoload/llama.vim index 4e0f473..30d4f4e 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -82,12 +82,11 @@ let s:default_config = { \ 'keymap_fim_accept_full': "", \ 'keymap_fim_accept_line': "", \ 'keymap_fim_accept_word': "", - \ 'keymap_inst_trigger': v:null, + \ 'keymap_inst_trigger': "", \ 'keymap_inst_accept': "", \ 'keymap_inst_cancel': "", \ 'keymap_debug_toggle': v:null, \ 'enable_at_startup': v:true, - \ 'timeout_inst': 60000, \ } let llama_config = get(g:, 'llama_config', s:default_config) diff --git a/doc/llama.txt b/doc/llama.txt index 09a0c30..a13abb1 100644 --- a/doc/llama.txt +++ b/doc/llama.txt @@ -20,6 +20,7 @@ Default Shortcut - Shift+Tab - accept just the first line of the suggestion - Ctrl+B - accept just the first word of the suggestion - Ctrl+F - trigger FIM completion manually +- Ctrl+I - trigger instruction-based editing (instruct mode) ================================================================================ Commands @@ -96,9 +97,11 @@ variable. Currently the default config is: >vim let s:default_config = { - \ 'endpoint': 'http://127.0.0.1:8012/infill', + \ 'endpoint_fim': 'http://127.0.0.1:8012/infill', + \ 'endpoint_inst': 'http://127.0.0.1:8012/v1/chat/completions', \ 'api_key': '', - \ 'model': '', + \ 'model_fim': '', + \ 'model_inst': '', \ 'n_prefix': 256, \ 'n_suffix': 64, \ 'n_predict': 128, @@ -117,17 +120,21 @@ Currently the default config is: \ 'keymap_fim_accept_full': "", \ 'keymap_fim_accept_line': "", \ 'keymap_fim_accept_word': "", + \ 'keymap_inst_trigger': v:null, \ 'keymap_debug_toggle': v:null, \ 'enable_at_startup': v:true, \ } < -- {endpoint} llama.cpp server endpoint +- {endpoint_fim} llama.cpp server endpoint for FIM completion + +- {endpoint_inst} llama.cpp server endpoint for instruction completion - {api_key} llama.cpp server api key (optional) -- {model} model name in case if multiple models are - loaded (optional) +- {model_fim} model name for FIM completion (optional) + +- {model_inst} model name for instruction completion (optional) - {n_prefix} number of lines before the cursor location to include in the local prefix @@ -184,6 +191,8 @@ keymaps parameters: - {keymap_fim_accept_word} keymap to accept word suggestion, default: +- {keymap_inst_trigger} keymap to trigger instruction-based editing, default: null + - {keymap_debug_toggle} keymap to toggle the debug pane, default: null Example: From 3574074d162ced4d76c0831f52cb35e2be594a17 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:10:17 +0200 Subject: [PATCH 4/8] cont : cleanup --- autoload/llama.vim | 44 ++++++++++++++++++++++++-------------------- doc/llama.txt | 3 +++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/autoload/llama.vim b/autoload/llama.vim index 30d4f4e..31867ce 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -1278,21 +1278,21 @@ function! llama#inst_send(start, end, lines, inst, callback) call add(s:inst_requests, l:req) - " highlights the selected text - let l:bufnr = bufnr('%') - if s:ghost_text_nvim - let l:ns = nvim_create_namespace('vt_inst') - let l:req.extmark = nvim_buf_set_extmark(l:bufnr, l:ns, a:start - 1, 0, { - \ 'end_row': l:end - 1, - \ 'end_col': len(getline(l:end)), - \ 'hl_group': 'llama_hl_inst_src' - \ }) - elseif s:ghost_text_vim - " TODO: implement classic Vim support - endif - - " Initialize virtual text with processing status - call s:inst_update(l:request_id, 'proc') + " highlights the selected text + let l:bufnr = bufnr('%') + if s:ghost_text_nvim + let l:ns = nvim_create_namespace('vt_inst') + let l:req.extmark = nvim_buf_set_extmark(l:bufnr, l:ns, a:start - 1, 0, { + \ 'end_row': l:end - 1, + \ 'end_col': len(getline(l:end)), + \ 'hl_group': 'llama_hl_inst_src' + \ }) + elseif s:ghost_text_vim + " TODO: implement classic Vim support + endif + + " Initialize virtual text with processing status + call s:inst_update(l:request_id, 'proc') " Build the payload let l:system_message = { @@ -1314,18 +1314,20 @@ function! llama#inst_send(start, end, lines, inst, callback) let l:user_content = "" let l:user_content .= "--- context ----------------------------------------------------\n" let l:user_content .= join(l:extra_context, "\n") . "\n" - let l:user_content .= "--- instruction ------------------------------------------------\n" - let l:user_content .= a:inst . "\n" let l:user_content .= "--- selection --------------------------------------------------\n" let l:user_content .= join(a:lines, "\n") . "\n" + let l:user_content .= "--- instruction ------------------------------------------------\n" + let l:user_content .= a:inst . "\n" let l:user_content .= "--- result -----------------------------------------------------\n" + call llama#debug_log('inst_send | ' . a:inst, l:user_content) + let l:user_message = {'role': 'user', 'content': l:user_content} let l:messages = [l:system_message, l:user_message] let l:request = { - \ 'model': g:llama_config.model_inst, + \ 'id_slot': 0, \ 'messages': l:messages, \ 'min_p': 0.1, \ 'samplers': ["min_p"], @@ -1333,8 +1335,6 @@ function! llama#inst_send(start, end, lines, inst, callback) \ 'cache_prompt': v:true, \ } - call llama#debug_log('inst_send | ' . a:inst, l:user_content) - let l:curl_command = [ \ "curl", \ "--silent", @@ -1345,6 +1345,10 @@ function! llama#inst_send(start, end, lines, inst, callback) \ "--data", "@-", \ ] + if exists("g:llama_config.model_inst") && len("g:llama_config.model_inst") > 0 + let l:request.model = g:llama_config.model_inst + endif + if exists("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) endif diff --git a/doc/llama.txt b/doc/llama.txt index a13abb1..2054421 100644 --- a/doc/llama.txt +++ b/doc/llama.txt @@ -45,6 +45,9 @@ Commands Toggle autofim for this vim/nvim session Equivalent to vimscript function: `llama#toggle_auto_fim()` +*:LlamaInstruct* + Trigger instruction-based editing (instruct mode) on selected text + *:LlamaDebugClear* Clear the debug pane logs. Equivalent to vimscript function: `debug#clear()` From 2563150c03f0eab805b14d0684803fed9820ae3a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:27:30 +0200 Subject: [PATCH 5/8] cont : fix bufnr --- autoload/llama.vim | 75 +++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/autoload/llama.vim b/autoload/llama.vim index 31867ce..e799c50 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -443,6 +443,20 @@ function! s:pick_chunk(text, no_mod, do_evict) "let &statusline = 'extra context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued) endfunction +function! s:ring_get_extra() + " extra context + let l:extra = [] + for l:chunk in s:ring_chunks + call add(l:extra, { + \ 'text': l:chunk.str, + \ 'time': l:chunk.time, + \ 'filename': l:chunk.filename + \ }) + endfor + + return l:extra +endfunction + " picks a queued chunk, sends it for processing and adds it to s:ring_chunks " called every g:llama_config.ring_update_ms function! s:ring_update() @@ -467,21 +481,14 @@ function! s:ring_update() "let &statusline = 'updated context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued) " send asynchronous job with the new extra context so that it is ready for the next FIM - let l:extra_context = [] - for l:chunk in s:ring_chunks - call add(l:extra_context, { - \ 'text': l:chunk.str, - \ 'time': l:chunk.time, - \ 'filename': l:chunk.filename - \ }) - endfor + let l:extra = s:ring_get_extra() " no samplers needed here let l:request = { \ 'id_slot': 0, \ 'input_prefix': "", \ 'input_suffix': "", - \ 'input_extra': l:extra_context, + \ 'input_extra': l:extra, \ 'prompt': "", \ 'n_predict': 0, \ 'temperature': 0.0, @@ -719,21 +726,13 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort " endif "endfor - " prepare the extra context data - let l:extra_ctx = [] - for l:chunk in s:ring_chunks - call add(l:extra_ctx, { - \ 'text': l:chunk.str, - \ 'time': l:chunk.time, - \ 'filename': l:chunk.filename - \ }) - endfor + let l:extra = s:ring_get_extra() let l:request = { \ 'id_slot': 0, \ 'input_prefix': l:prefix, \ 'input_suffix': l:suffix, - \ 'input_extra': l:extra_ctx, + \ 'input_extra': l:extra, \ 'prompt': l:middle, \ 'n_predict': g:llama_config.n_predict, \ 'stop': g:llama_config.stop_strings, @@ -1265,8 +1264,11 @@ function! llama#inst_send(start, end, lines, inst, callback) let l:end = min([a:end, line('$')]) + let l:bufnr = bufnr('%') + let l:req = { \ 'id': l:request_id, + \ 'bufnr': l:bufnr, \ 'range': [a:start, l:end], \ 'status': 'proc', \ 'result': '', @@ -1279,7 +1281,6 @@ function! llama#inst_send(start, end, lines, inst, callback) call add(s:inst_requests, l:req) " highlights the selected text - let l:bufnr = bufnr('%') if s:ghost_text_nvim let l:ns = nvim_create_namespace('vt_inst') let l:req.extmark = nvim_buf_set_extmark(l:bufnr, l:ns, a:start - 1, 0, { @@ -1300,20 +1301,11 @@ function! llama#inst_send(start, end, lines, inst, callback) \ 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.' \ } - " extra context - " TODO: deduplicate - let l:extra_context = [] - for l:chunk in s:ring_chunks - call add(l:extra_context, { - \ 'text': l:chunk.str, - \ 'time': l:chunk.time, - \ 'filename': l:chunk.filename - \ }) - endfor + let l:extra = s:ring_get_extra() let l:user_content = "" let l:user_content .= "--- context ----------------------------------------------------\n" - let l:user_content .= join(l:extra_context, "\n") . "\n" + let l:user_content .= join(l:extra, "\n") . "\n" let l:user_content .= "--- selection --------------------------------------------------\n" let l:user_content .= join(a:lines, "\n") . "\n" let l:user_content .= "--- instruction ------------------------------------------------\n" @@ -1376,7 +1368,7 @@ function! llama#inst_send(start, end, lines, inst, callback) endfunction function! llama#inst_update_pos(req) - let l:bufnr = bufnr('%') + let l:bufnr = a:req.bufnr let l:ns = nvim_create_namespace('vt_inst') let l:extmark_pos = nvim_buf_get_extmark_by_id(l:bufnr, l:ns, a:req.extmark, {}) @@ -1399,7 +1391,7 @@ function! s:inst_update(id, status) let l:ns = nvim_create_namespace('vt_inst') if l:req.extmark_virt != -1 - call nvim_buf_del_extmark(bufnr('%'), l:ns, l:req.extmark_virt) + call nvim_buf_del_extmark(l:req.bufnr, l:ns, l:req.extmark_virt) let l:req.extmark_virt = -1 endif @@ -1428,7 +1420,7 @@ function! s:inst_update(id, status) endif if !empty(l:virt_lines) - let l:req.extmark_virt = nvim_buf_set_extmark(bufnr('%'), l:ns, l:req.range[1] - 1, 0, { + let l:req.extmark_virt = nvim_buf_set_extmark(l:req.bufnr, l:ns, l:req.range[1] - 1, 0, { \ 'virt_lines': l:virt_lines \ }) endif @@ -1485,12 +1477,13 @@ endfunction function! s:inst_remove(id) for i in range(len(s:inst_requests)) - if s:inst_requests[i].id == a:id + let l:req = s:inst_requests[i] + if l:req.id == a:id if s:ghost_text_nvim let l:ns = nvim_create_namespace('vt_inst') - call nvim_buf_del_extmark(bufnr('%'), l:ns, s:inst_requests[i].extmark) - if s:inst_requests[i].extmark_virt != -1 - call nvim_buf_del_extmark(bufnr('%'), l:ns, s:inst_requests[i].extmark_virt) + call nvim_buf_del_extmark(l:req.bufnr, l:ns, l:req.extmark) + if l:req.extmark_virt != -1 + call nvim_buf_del_extmark(l:req.bufnr, l:ns, l:req.extmark_virt) endif endif call remove(s:inst_requests, i) @@ -1499,7 +1492,7 @@ function! s:inst_remove(id) endfor endfunction -function! s:inst_callback(start, end, result) +function! s:inst_callback(bufnr, start, end, result) let l:result_lines = split(a:result, "\n", 1) " Remove trailing empty lines @@ -1511,7 +1504,7 @@ function! s:inst_callback(start, end, result) let l:num_original = a:end - a:start + 1 " Delete the original range - call deletebufline(bufnr('%'), a:start, a:end) + call deletebufline(a:bufnr, a:start, a:end) " Insert the new lines call append(a:start - 1, l:result_lines) @@ -1525,7 +1518,7 @@ function! llama#inst_accept() call llama#inst_update_pos(l:req) if l:line >= l:req.range[0] && l:line <= l:req.range[1] - call s:inst_callback(l:req.range[0], l:req.range[1], l:req.result) + call s:inst_callback(l:req.bufnr, l:req.range[0], l:req.range[1], l:req.result) call s:inst_remove(l:req.id) return endif From 9df00957ea26b66f697fae8b59213c366ca95b41 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:35:24 +0200 Subject: [PATCH 6/8] cont : send warm-up request --- autoload/llama.vim | 96 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/autoload/llama.vim b/autoload/llama.vim index e799c50..3ab27e5 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -1247,8 +1247,81 @@ endfunction " Instruct-based editing " ===================================== +function! llama#inst_build(lines, inst) + let l:system_message = { + \ 'role': 'system', + \ 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.' + \ } + + let l:extra = s:ring_get_extra() + + let l:user_content = "" + let l:user_content .= "--- context ----------------------------------------------------\n" + let l:user_content .= join(l:extra, "\n") . "\n" + let l:user_content .= "--- selection --------------------------------------------------\n" + let l:user_content .= join(a:lines, "\n") . "\n" + let l:user_content .= "--- instruction ------------------------------------------------\n" + + if !empty(a:inst) + let l:user_content .= a:inst . "\n" + let l:user_content .= "--- result -----------------------------------------------------\n" + endif + + let l:user_message = {'role': 'user', 'content': l:user_content} + + let l:messages = [l:system_message, l:user_message] + + return l:messages +endfunction + function! llama#inst(start, end) let l:lines = getline(a:start, a:end) + + " while the user is providing an instruction, send a warm-up request + let l:messages = llama#inst_build(l:lines, '') + + let l:request = { + \ 'id_slot': 0, + \ 'messages': l:messages, + \ 'samplers': [], + \ 'n_predict': 0, + \ 'stream': v:false, + \ 'cache_prompt': v:true, + \ 'response_fields': [""], + \ } + + let l:curl_command = [ + \ "curl", + \ "--silent", + \ "--no-buffer", + \ "--request", "POST", + \ "--url", g:llama_config.endpoint_inst, + \ "--header", "Content-Type: application/json", + \ "--data", "@-", + \ ] + + if exists("g:llama_config.model_inst") && len("g:llama_config.model_inst") > 0 + let l:request.model = g:llama_config.model_inst + endif + + if exists("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 + call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) + endif + + let l:request_json = json_encode(l:request) + + " no callbacks because we don't need to process the response + if s:ghost_text_nvim + let jobid = jobstart(l:curl_command, {}) + call chansend(jobid, l:request_json) + call chanclose(jobid, 'stdin') + elseif s:ghost_text_vim + let jobid = job_start(l:curl_command, {}) + let channel = job_getchannel(jobid) + call ch_sendraw(channel, l:request_json) + call ch_close_in(channel) + endif + let l:inst = input('Instruction: ') if empty(l:inst) return @@ -1295,28 +1368,9 @@ function! llama#inst_send(start, end, lines, inst, callback) " Initialize virtual text with processing status call s:inst_update(l:request_id, 'proc') - " Build the payload - let l:system_message = { - \ 'role': 'system', - \ 'content': 'You are a code-editing assistant. Return ONLY the result after applying the instruction to the selection.' - \ } - - let l:extra = s:ring_get_extra() - - let l:user_content = "" - let l:user_content .= "--- context ----------------------------------------------------\n" - let l:user_content .= join(l:extra, "\n") . "\n" - let l:user_content .= "--- selection --------------------------------------------------\n" - let l:user_content .= join(a:lines, "\n") . "\n" - let l:user_content .= "--- instruction ------------------------------------------------\n" - let l:user_content .= a:inst . "\n" - let l:user_content .= "--- result -----------------------------------------------------\n" - - call llama#debug_log('inst_send | ' . a:inst, l:user_content) + let l:messages = llama#inst_build(a:lines, a:inst) - let l:user_message = {'role': 'user', 'content': l:user_content} - - let l:messages = [l:system_message, l:user_message] + call llama#debug_log('inst_send | ' . a:inst, join(l:messages, "\n")) let l:request = { \ 'id_slot': 0, From 9c35719feef49f8dc921e49af039e2d22cf7b2fb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:48:10 +0200 Subject: [PATCH 7/8] readme : update --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ae24db7..e19f6c4 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,16 @@ Local LLM-assisted text completion. image ---- +#### Fill-in-Middle (FIM) completions ![llama vim-spec-1](https://github.com/user-attachments/assets/404ebc2a-e4b8-4119-999b-e5365ec3208d) +#### Instruction-based editing + +https://github.com/user-attachments/assets/641a6e72-f1a2-4fe5-b0fd-c2597c6f4cdc + +--- + ## Features - Auto-suggest on cursor movement in `Insert` mode From b48d5a5bdef4187daa798a3c2958ffb27f57b380 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 19 Jan 2026 19:49:54 +0200 Subject: [PATCH 8/8] readme : update about endpoints --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e19f6c4..7630896 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ for the full list of options. ### llama.cpp setup -The plugin requires a [llama.cpp](https://github.com/ggml-org/llama.cpp) server instance to be running at [`g:llama_config.endpoint`](https://github.com/ggml-org/llama.vim/blob/master/autoload/llama.vim#L37). +The plugin requires a [llama.cpp](https://github.com/ggml-org/llama.cpp) server instance to be running at [`g:llama_config.endpoint_fim`](https://github.com/ggml-org/llama.vim/blob/master/autoload/llama.vim#L18) and/or [`g:llama_config.endpoint_inst`](https://github.com/ggml-org/llama.vim/blob/master/autoload/llama.vim#L19). #### Mac OS