From 2d7baaf50f3277e65cf71071f61ea34823d14c30 Mon Sep 17 00:00:00 2001 From: AustinMroz Date: Tue, 8 Aug 2023 06:44:48 -0500 Subject: [PATCH] vim : streaming and more (#2495) * Update Vim plugin * Remove getbufoneline usage, Add input bind example. getbufoneline() appears to be a recently added function and has been replaced with getbufline for compatibility. An additional example that explains how to add a keybind that works in insert mode was added. --- examples/llama.vim | 132 +++++++++++++++++++++++++++++++++++++++++++++ examples/llm.vim | 23 -------- 2 files changed, 132 insertions(+), 23 deletions(-) create mode 100644 examples/llama.vim delete mode 100644 examples/llm.vim diff --git a/examples/llama.vim b/examples/llama.vim new file mode 100644 index 000000000..f03fadfb7 --- /dev/null +++ b/examples/llama.vim @@ -0,0 +1,132 @@ +" Requires an already running llama.cpp server +" To install either copy or symlink to ~/.vim/autoload/llama.vim +" Then start with either :call llama#doLlamaGen(), +" or add a keybind to your vimrc such as +" nnoremap Z :call llama#doLlamaGen() +" Similarly, you could add an insert mode keybind with +" inoremap call llama#doLlamaGen() +" +" g:llama_api_url and g:llama_overrides can be configured in your .vimrc +" let g:llama_api_url = "192.168.1.10:8080" +" llama_overrides can also be set through buffer/window scopes. For instance +" autocmd filetype python let b:llama_overrides = {"temp": 0.2} +" Could be added to your .vimrc to automatically set a lower temperature when +" editing a python script +" Additionally, an override dict can be stored at the top of a file +" !*{"stop": ["User:"]} +" Could be added to the start of your chatlog.txt to set the stopping token +" These parameter dicts are merged together from lowest to highest priority: +" server default -> g:llama_overrides -> w:llama_overrides -> +" b:llama_overrides -> in file (!*) overrides +" +" Sublists (like logit_bias and stop) are overridden, not merged +" Example override: +" !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647} +if !exists("g:llama_api_url") + let g:llama_api_url= "127.0.0.1:8080" +endif +if !exists("g:llama_overrides") + let g:llama_overrides = {} +endif +const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true } +const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"] +let s:linedict = {} + +func s:callbackHandler(bufn, channel, msg) + if len(a:msg) < 3 + return + elseif a:msg[0] == "d" + let l:msg = a:msg[6:-1] + else + let l:msg = a:msg + endif + let l:decoded_msg = json_decode(l:msg) + let l:newtext = split(l:decoded_msg['content'], "\n", 1) + if len(l:newtext) > 0 + call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0]) + else + echo "nothing genned" + endif + if len(newtext) > 1 + let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1]) + let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1 + endif + if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop + echo "Finished generation" + endif +endfunction + +func llama#doLlamaGen() + if exists("b:job") + if job_status(b:job) == "run" + call job_stop(b:job) + return + endif + endif + + let l:cbuffer = bufnr("%") + let s:linedict[l:cbuffer] = line('$') + let l:buflines = getbufline(l:cbuffer, 1, 1000) + let l:querydata = copy(s:querydata) + call extend(l:querydata, g:llama_overrides) + if exists("w:llama_overrides") + call extend(l:querydata, w:llama_overrides) + endif + if exists("b:llama_overrides") + call extend(l:querydata, b:llama_overrides) + endif + if l:buflines[0][0:1] == '!*' + let l:userdata = json_decode(l:buflines[0][2:-1]) + call extend(l:querydata, l:userdata) + let l:buflines = l:buflines[1:-1] + endif + let l:querydata.prompt = join(l:buflines, "\n") + let l:curlcommand = copy(s:curlcommand) + let l:curlcommand[2] = json_encode(l:querydata) + let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])}) +endfunction + +" Echos the tokkenization of the provided string , or cursor to end of word +" Onus is placed on the user to include the preceding space +func llama#tokenizeWord(...) + if (a:0 > 0) + let l:input = a:1 + else + exe "normal \"*ye" + let l:input = @* + endif + let l:querydata = {"content": l:input} + let l:curlcommand = copy(s:curlcommand) + let l:curlcommand[2] = json_encode(l:querydata) + let l:curlcommand[8] = g:llama_api_url .. "/tokenize" + let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])}) +endfunction + +func s:tokenizeWordCallback(plaintext, channel, msg) + echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens) +endfunction + + +" Echos the token count of the entire buffer (or provided string) +" Example usage :echo llama#tokenCount() +func llama#tokenCount(...) + if (a:0 > 0) + let l:buflines = a:1 + else + let l:buflines = getline(1,1000) + if l:buflines[0][0:1] == '!*' + let l:buflines = l:buflines[1:-1] + endif + let l:buflines = join(l:buflines, "\n") + endif + let l:querydata = {"content": l:buflines} + let l:curlcommand = copy(s:curlcommand) + let l:curlcommand[2] = json_encode(l:querydata) + let l:curlcommand[8] = g:llama_api_url .. "/tokenize" + let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"}) +endfunction + +func s:tokenCountCallback(channel, msg) + let resp = json_decode(a:msg) + echo len(resp.tokens) +endfunction diff --git a/examples/llm.vim b/examples/llm.vim deleted file mode 100644 index efecad0cd..000000000 --- a/examples/llm.vim +++ /dev/null @@ -1,23 +0,0 @@ -function! Llm() - - let url = "http://127.0.0.1:8080/completion" - - " Get the content of the current buffer - let buffer_content = join(getline(1, '$'), "\n") - - " Create the JSON payload - let json_payload = {"temp":0.72,"top_k":100,"top_p":0.73,"repeat_penalty":1.100000023841858,"n_predict":10,"stream": v:false} - let json_payload.prompt = buffer_content - - " Define the curl command - let curl_command = 'curl -k -s -X POST -H "Content-Type: application/json" -d @- ' . url - let response = system(curl_command, json_encode(json_payload)) - - " Extract the content field from the response - let content = json_decode(response).content - - " Insert the content at the cursor position - call setline(line('.'), getline('.') . content) -endfunction - -command! Llm call Llm()