From 90477d164daf7fee494202005094a8f259b7b3ac Mon Sep 17 00:00:00 2001 From: Alessandro Mauri Date: Sun, 24 Dec 2023 23:23:35 +0100 Subject: [PATCH] various fixes, better config, better fetch handling --- package.json | 45 ++++++++++++----------- src/common.ts | 5 ++- src/extension.ts | 21 +---------- src/llamacpp-api.ts | 28 +++++++------- src/openai-api.ts | 90 +++++++++++++++++++++++---------------------- 5 files changed, 88 insertions(+), 101 deletions(-) diff --git a/package.json b/package.json index 0fa9e4a..e049cfd 100644 --- a/package.json +++ b/package.json @@ -141,60 +141,60 @@ "default": false, "description": "Enable Fill in Middle mode, defaults to Up-to cursor context" }, - "dumbpilot.llamaHost": { + "dumbpilot.endpoint": { "type": "string", "default": "http://0.0.0.0:8080", "description": "llama.cpp server address" }, - "dumbpilot.llamaCtxsize": { + "dumbpilot.parameters.ContextSize": { "type": "number", "default": 2048 }, - "dumbpilot.llamaMaxtokens": { + "dumbpilot.parameters.MaxTokens": { "type": "number", "default": -1 }, - "dumbpilot.llamaMirostat": { + "dumbpilot.parameters.Mirostat": { "type": "number", "default": 0 }, - "dumbpilot.llamaRepeatPenalty": { + "dumbpilot.parameters.RepeatPenalty": { "type": "number", "default": 1.11 }, - "dumbpilot.llamaFrequencyPenalty": { + "dumbpilot.parameters.FrequencyPenalty": { "type": "number", "default": 0 }, - "dumbpilot.llamaPresencePenalty": { + "dumbpilot.parameters.PresencePenalty": { "type": "number", "default": 0 }, - "dumbpilot.llamaRepeatCtx": { + "dumbpilot.parameters.RepeatCtx": { "type": "number", "default": 256 }, - "dumbpilot.llamaTemperature": { + "dumbpilot.parameters.Temperature": { "type": "number", "default": 0.25 }, - "dumbpilot.llamaTop_p": { + "dumbpilot.parameters.Top_p": { "type": "number", "default": 0.95 }, - "dumbpilot.llamaTop_k": { + "dumbpilot.parameters.Top_k": { "type": "number", "default": 40 }, - "dumbpilot.llamaTypical_p": { + "dumbpilot.parameters.Typical_p": { "type": "number", "default": 0.95 }, - "dumbpilot.llamaTailfree_z": { + "dumbpilot.parameters.Tailfree_z": { "type": "number", "default": 0.5 }, - "dumbpilot.llamaSeed": { + "dumbpilot.parameters.Seed": { "type": "number", "default": -1 }, @@ -215,31 +215,32 @@ "default": false, "description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text" }, - "dumbpilot.llamaCachePrompt": { + "dumbpilot.CachePrompt": { "type": "boolean", "default": true, "description": "Enable prompt caching for faster results" }, - "dumbpilot.llamaInstructModel": { + "dumbpilot.model.InstructModel": { "type": "boolean", "default": false, "description": "For use with instruct models" }, - "dumbpilot.llamaSystemPrompt": { + "dumbpilot.model.SystemPrompt": { "type": "string", "description": "The system prompt that the model considers at the beginning of every request, used by instruct models" }, - "dumbpilot.llamaUseOpenAIAPI": { - "type": "boolean", - "default": true, + "dumbpilot.API": { + "type": "string", + "enum": ["llamacpp", "OpenAI"], + "default": "OpenAI", "description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API" }, - "dumbpilot.llamaModelName": { + "dumbpilot.model.ModelName": { "type": "string", "default": "deepseek-coder-6.7B-base.gguf", "description": "Name of the model to use, only works in OpenAI API mode" }, - "dumbpilot.llamaAPIStream": { + "dumbpilot.parameters.stream": { "type": "boolean", "default": false } diff --git a/src/common.ts b/src/common.ts index c531d15..edc75c7 100644 --- a/src/common.ts +++ b/src/common.ts @@ -60,7 +60,7 @@ export async function showPendingStatusBar( let st_msg: vscode.StatusBarItem | undefined; -export function updateStatusBarMessage(text: string) { +export function updateStatusBarMessage(total: number, text: string) { if (!st_msg) { st_msg = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, -100); } @@ -68,9 +68,10 @@ export function updateStatusBarMessage(text: string) { const run_color = new vscode.ThemeColor('statusBarItem.warningBackground'); if (text.length > 0) { st_msg.backgroundColor = run_color; - st_msg.text = '$(megaphone) ' + text.trim(); + st_msg.text = total + ' $(megaphone) ' + text.trim(); st_msg.show(); } else { + st_msg.text = ''; st_msg.hide(); } } diff --git a/src/extension.ts b/src/extension.ts index cd816f8..80f5cbf 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -14,24 +14,11 @@ import { openAIMakeRequest, } from './openai-api'; import { - FetchErrorCause, ResponseData, showMessageWithTimeout, showPendingStatusBar, - updateStatusBarMessage, } from './common'; -// clean up the document -function clean_text(txt: string): string { - // these are already done by JSON.stringify() - //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); - //txt = txt.replace((/\t/gm, "\\t")); - - // FIXME: I don't know if this penalizes some results since most people indent with spaces - //txt = txt.replace(/\s+/gm, " "); - return txt; -} - export function activate(context: vscode.ExtensionContext) { console.log('dumbpilot is now active'); @@ -54,8 +41,6 @@ export function activate(context: vscode.ExtensionContext) { config.update('completionEnabled', false); }); - updateStatusBarMessage(''); - // Register a new provider of inline completions, this does not decide how it is invoked // only what the completion should be // https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts @@ -90,10 +75,6 @@ export function activate(context: vscode.ExtensionContext) { let doc_before = doc_text.substring(0, doc_off); let doc_after = doc_text.substring(doc_off); - // make it cleaner in hope to reduce the number of tokens - doc_before = clean_text(doc_before); - doc_after = clean_text(doc_after); - // TODO: prune text up to a maximum context length // Prefix the filename in a comment @@ -109,7 +90,7 @@ export function activate(context: vscode.ExtensionContext) { // actially make the request let data: ResponseData = { content: '', tokens: 0, time: 0 }; let promise: Promise; - if (config.get('llamaUseOpenAIAPI') === true) { + if (config.get('API') === 'OpenAI') { const request: OpenAICompletionRequest = createOpenAIAPIRequest( config, doc_before, diff --git a/src/llamacpp-api.ts b/src/llamacpp-api.ts index 28756c4..56952e1 100644 --- a/src/llamacpp-api.ts +++ b/src/llamacpp-api.ts @@ -59,20 +59,20 @@ export function createLlamacppRequest( doc_after: string ): LlamaRequest { let request: LlamaRequest = { - n_predict: config.get('llamaMaxtokens') as number, - mirostat: config.get('llamaMirostat') as number, - repeat_penalty: config.get('llamaRepeatPenalty') as number, - frequency_penalty: config.get('llamaFrequencyPenalty,') as number, - presence_penalty: config.get('llamaPresencePenalty,') as number, - repeat_last_n: config.get('llamaRepeatCtx,') as number, - temperature: config.get('llamaTemperature') as number, - top_p: config.get('llamaTop_p') as number, - top_k: config.get('llamaTop_k') as number, - typical_p: config.get('llamaTypical_p') as number, - tfs_z: config.get('llamaTailfree_z,') as number, - seed: config.get('llamaSeed') as number, + n_predict: config.get('parameters.MaxTokens') as number, + mirostat: config.get('parameters.Mirostat') as number, + repeat_penalty: config.get('parameters.RepeatPenalty') as number, + frequency_penalty: config.get('parameters.FrequencyPenalty,') as number, + presence_penalty: config.get('parameters.PresencePenalty,') as number, + repeat_last_n: config.get('parameters.RepeatCtx,') as number, + temperature: config.get('parameters.Temperature') as number, + top_p: config.get('parameters.Top_p') as number, + top_k: config.get('parameters.Top_k') as number, + typical_p: config.get('parameters.Typical_p') as number, + tfs_z: config.get('parameters.Tailfree_z,') as number, + seed: config.get('parameters.Seed') as number, stream: false, - cache_prompt: config.get('llamaCachePrompt') as boolean, + cache_prompt: config.get('CachePrompt') as boolean, }; const fim = config.get('fimEnabled') as boolean; @@ -98,7 +98,7 @@ export function createLlamacppRequest( export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string { const fim = config.get('fimEnabled') as boolean; const fimRequest = config.get('useFillInMiddleRequest') as boolean; - let req_str: string = config.get('llamaHost') as string; + let req_str: string = config.get('endpoint') as string; if (fim === true && fimRequest === true) { req_str += '/infill'; diff --git a/src/openai-api.ts b/src/openai-api.ts index 16178c0..4acddd5 100644 --- a/src/openai-api.ts +++ b/src/openai-api.ts @@ -3,10 +3,8 @@ import { FetchErrorCause, ResponseData, showMessageWithTimeout, - showPendingStatusBar, updateStatusBarMessage, } from './common'; -import { config } from 'process'; // oogabooga/text-generation-webui OpenAI compatible API // https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API @@ -98,19 +96,19 @@ export function createOpenAIAPIRequest( ): OpenAICompletionRequest { let request: OpenAICompletionRequest = { prompt: '', - max_tokens: config.get('llamaMaxtokens') as number, - mirostat_mode: config.get('llamaMirostat') as number, - repetition_penalty: config.get('llamaRepeatPenalty') as number, - frequency_penalty: config.get('llamaFrequencyPenalty,') as number, - presence_penalty: config.get('llamaPresencePenalty,') as number, - repetition_penalty_range: config.get('llamaRepeatCtx,') as number, - temperature: config.get('llamaTemperature') as number, - top_p: config.get('llamaTop_p') as number, - top_k: config.get('llamaTop_k') as number, - typical_p: config.get('llamaTypical_p') as number, - tfs: config.get('llamaTailfree_z,') as number, - seed: config.get('llamaSeed') as number, - stream: config.get('llamaAPIStream'), + max_tokens: config.get('parameters.MaxTokens') as number, + mirostat_mode: config.get('parameters.Mirostat') as number, + repetition_penalty: config.get('parameters.RepeatPenalty') as number, + frequency_penalty: config.get('parameters.FrequencyPenalty,') as number, + presence_penalty: config.get('parameters.PresencePenalty,') as number, + repetition_penalty_range: config.get('parameters.RepeatCtx,') as number, + temperature: config.get('parameters.Temperature') as number, + top_p: config.get('parameters.Top_p') as number, + top_k: config.get('parameters.Top_k') as number, + typical_p: config.get('parameters.Typical_p') as number, + tfs: config.get('parameters.Tailfree_z,') as number, + seed: config.get('parameters.Seed') as number, + stream: config.get('parameters.stream') as boolean, }; const fim = config.get('fimEnabled') as boolean; @@ -127,11 +125,13 @@ export function createOpenAIAPIRequest( return request; } -// for now only completions is implemented +// for now only vv1/completions is implemented +// TODO: implement chat export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string { - return (config.get('llamaHost') as string) + '/v1/completions'; + return (config.get('endpoint') as string) + '/v1/completions'; } +// make a request and parse the incoming data export async function openAIMakeRequest( request_body: OpenAICompletionRequest, endpoint: string @@ -169,41 +169,45 @@ export async function openAIMakeRequest( // start a timer const timer_start = performance.now(); + let chunk_number: number = 1; for await (const chunk of response.body) { - // FIXME: why the fuck do I have to do this shite - let data_text = new TextDecoder().decode(chunk); - data_text = data_text.substring(data_text.indexOf('{')); - let data: OpenAICompletionResponse; - - try { - data = JSON.parse(data_text); - } catch (e: any) { - console.error(e); - return ret; - } - //console.log(JSON.stringify(data)); + // each chunk of data is a complete response in the form of a uint8 array + const data_text = Buffer.from(chunk as Uint8Array).toString(); - if (Object.hasOwn(data, 'detail') === true) { - data = data as OpenAICompletionFailureResponse; - // TODO: why did it error? - throw new Error('OpenAI Endpoint Error'); + // each response chunk contains one or more data chunks, which in turn are just json data + const data_chunks = data_text.split('data: '); + let data: OpenAICompletionResponse; + for (const data_string of data_chunks) { + data_string.trim(); + if (data_string.length < 2) { + continue; + } + + data = JSON.parse(data_string); + //console.log(JSON.stringify(data)); + + if (Object.hasOwn(data, 'detail') === true) { + data = data as OpenAICompletionFailureResponse; + // TODO: why did it error? + throw new Error('OpenAI Endpoint Error'); + } + // unpack the data + data = data as OpenAICompletionSuccessResponse; + + for (const choice of data.choices) { + ret.content += choice.text; + updateStatusBarMessage(chunk_number, choice.text); + chunk_number++; + } + ret.tokens += data.usage?.completion_tokens || 0; } - - // unpack the data - data = data as OpenAICompletionSuccessResponse; - // FIXME: why the choices may be multiple? - // TODO: display the multiple choices - //console.log(data.choices[0].text); - updateStatusBarMessage(data.choices[0].text); - ret.content += data.choices[0].text; - ret.tokens += data.usage?.completion_tokens || 0; } // stop the timer const timer_end = performance.now(); ret.time = (timer_end - timer_start) / 1000.0; // clear the status bar item - updateStatusBarMessage(''); + updateStatusBarMessage(0, ''); } catch (e: any) { console.error(e); const err = e as TypeError;