various fixes, better config, better fetch handling

master
Alessandro Mauri 5 months ago
parent 12f3c82d3e
commit 90477d164d
Signed by: alema
GPG Key ID: 2B7BF9531FF03BE8
  1. 45
      package.json
  2. 5
      src/common.ts
  3. 21
      src/extension.ts
  4. 28
      src/llamacpp-api.ts
  5. 90
      src/openai-api.ts

@ -141,60 +141,60 @@
"default": false, "default": false,
"description": "Enable Fill in Middle mode, defaults to Up-to cursor context" "description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
}, },
"dumbpilot.llamaHost": { "dumbpilot.endpoint": {
"type": "string", "type": "string",
"default": "http://0.0.0.0:8080", "default": "http://0.0.0.0:8080",
"description": "llama.cpp server address" "description": "llama.cpp server address"
}, },
"dumbpilot.llamaCtxsize": { "dumbpilot.parameters.ContextSize": {
"type": "number", "type": "number",
"default": 2048 "default": 2048
}, },
"dumbpilot.llamaMaxtokens": { "dumbpilot.parameters.MaxTokens": {
"type": "number", "type": "number",
"default": -1 "default": -1
}, },
"dumbpilot.llamaMirostat": { "dumbpilot.parameters.Mirostat": {
"type": "number", "type": "number",
"default": 0 "default": 0
}, },
"dumbpilot.llamaRepeatPenalty": { "dumbpilot.parameters.RepeatPenalty": {
"type": "number", "type": "number",
"default": 1.11 "default": 1.11
}, },
"dumbpilot.llamaFrequencyPenalty": { "dumbpilot.parameters.FrequencyPenalty": {
"type": "number", "type": "number",
"default": 0 "default": 0
}, },
"dumbpilot.llamaPresencePenalty": { "dumbpilot.parameters.PresencePenalty": {
"type": "number", "type": "number",
"default": 0 "default": 0
}, },
"dumbpilot.llamaRepeatCtx": { "dumbpilot.parameters.RepeatCtx": {
"type": "number", "type": "number",
"default": 256 "default": 256
}, },
"dumbpilot.llamaTemperature": { "dumbpilot.parameters.Temperature": {
"type": "number", "type": "number",
"default": 0.25 "default": 0.25
}, },
"dumbpilot.llamaTop_p": { "dumbpilot.parameters.Top_p": {
"type": "number", "type": "number",
"default": 0.95 "default": 0.95
}, },
"dumbpilot.llamaTop_k": { "dumbpilot.parameters.Top_k": {
"type": "number", "type": "number",
"default": 40 "default": 40
}, },
"dumbpilot.llamaTypical_p": { "dumbpilot.parameters.Typical_p": {
"type": "number", "type": "number",
"default": 0.95 "default": 0.95
}, },
"dumbpilot.llamaTailfree_z": { "dumbpilot.parameters.Tailfree_z": {
"type": "number", "type": "number",
"default": 0.5 "default": 0.5
}, },
"dumbpilot.llamaSeed": { "dumbpilot.parameters.Seed": {
"type": "number", "type": "number",
"default": -1 "default": -1
}, },
@ -215,31 +215,32 @@
"default": false, "default": false,
"description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text" "description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text"
}, },
"dumbpilot.llamaCachePrompt": { "dumbpilot.CachePrompt": {
"type": "boolean", "type": "boolean",
"default": true, "default": true,
"description": "Enable prompt caching for faster results" "description": "Enable prompt caching for faster results"
}, },
"dumbpilot.llamaInstructModel": { "dumbpilot.model.InstructModel": {
"type": "boolean", "type": "boolean",
"default": false, "default": false,
"description": "For use with instruct models" "description": "For use with instruct models"
}, },
"dumbpilot.llamaSystemPrompt": { "dumbpilot.model.SystemPrompt": {
"type": "string", "type": "string",
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models" "description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
}, },
"dumbpilot.llamaUseOpenAIAPI": { "dumbpilot.API": {
"type": "boolean", "type": "string",
"default": true, "enum": ["llamacpp", "OpenAI"],
"default": "OpenAI",
"description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API" "description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API"
}, },
"dumbpilot.llamaModelName": { "dumbpilot.model.ModelName": {
"type": "string", "type": "string",
"default": "deepseek-coder-6.7B-base.gguf", "default": "deepseek-coder-6.7B-base.gguf",
"description": "Name of the model to use, only works in OpenAI API mode" "description": "Name of the model to use, only works in OpenAI API mode"
}, },
"dumbpilot.llamaAPIStream": { "dumbpilot.parameters.stream": {
"type": "boolean", "type": "boolean",
"default": false "default": false
} }

@ -60,7 +60,7 @@ export async function showPendingStatusBar(
let st_msg: vscode.StatusBarItem | undefined; let st_msg: vscode.StatusBarItem | undefined;
export function updateStatusBarMessage(text: string) { export function updateStatusBarMessage(total: number, text: string) {
if (!st_msg) { if (!st_msg) {
st_msg = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, -100); st_msg = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, -100);
} }
@ -68,9 +68,10 @@ export function updateStatusBarMessage(text: string) {
const run_color = new vscode.ThemeColor('statusBarItem.warningBackground'); const run_color = new vscode.ThemeColor('statusBarItem.warningBackground');
if (text.length > 0) { if (text.length > 0) {
st_msg.backgroundColor = run_color; st_msg.backgroundColor = run_color;
st_msg.text = '$(megaphone) ' + text.trim(); st_msg.text = total + ' $(megaphone) ' + text.trim();
st_msg.show(); st_msg.show();
} else { } else {
st_msg.text = '';
st_msg.hide(); st_msg.hide();
} }
} }

@ -14,24 +14,11 @@ import {
openAIMakeRequest, openAIMakeRequest,
} from './openai-api'; } from './openai-api';
import { import {
FetchErrorCause,
ResponseData, ResponseData,
showMessageWithTimeout, showMessageWithTimeout,
showPendingStatusBar, showPendingStatusBar,
updateStatusBarMessage,
} from './common'; } from './common';
// clean up the document
function clean_text(txt: string): string {
// these are already done by JSON.stringify()
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace((/\t/gm, "\\t"));
// FIXME: I don't know if this penalizes some results since most people indent with spaces
//txt = txt.replace(/\s+/gm, " ");
return txt;
}
export function activate(context: vscode.ExtensionContext) { export function activate(context: vscode.ExtensionContext) {
console.log('dumbpilot is now active'); console.log('dumbpilot is now active');
@ -54,8 +41,6 @@ export function activate(context: vscode.ExtensionContext) {
config.update('completionEnabled', false); config.update('completionEnabled', false);
}); });
updateStatusBarMessage('');
// Register a new provider of inline completions, this does not decide how it is invoked // Register a new provider of inline completions, this does not decide how it is invoked
// only what the completion should be // only what the completion should be
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts // https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
@ -90,10 +75,6 @@ export function activate(context: vscode.ExtensionContext) {
let doc_before = doc_text.substring(0, doc_off); let doc_before = doc_text.substring(0, doc_off);
let doc_after = doc_text.substring(doc_off); let doc_after = doc_text.substring(doc_off);
// make it cleaner in hope to reduce the number of tokens
doc_before = clean_text(doc_before);
doc_after = clean_text(doc_after);
// TODO: prune text up to a maximum context length // TODO: prune text up to a maximum context length
// Prefix the filename in a comment // Prefix the filename in a comment
@ -109,7 +90,7 @@ export function activate(context: vscode.ExtensionContext) {
// actially make the request // actially make the request
let data: ResponseData = { content: '', tokens: 0, time: 0 }; let data: ResponseData = { content: '', tokens: 0, time: 0 };
let promise: Promise<ResponseData>; let promise: Promise<ResponseData>;
if (config.get('llamaUseOpenAIAPI') === true) { if (config.get('API') === 'OpenAI') {
const request: OpenAICompletionRequest = createOpenAIAPIRequest( const request: OpenAICompletionRequest = createOpenAIAPIRequest(
config, config,
doc_before, doc_before,

@ -59,20 +59,20 @@ export function createLlamacppRequest(
doc_after: string doc_after: string
): LlamaRequest { ): LlamaRequest {
let request: LlamaRequest = { let request: LlamaRequest = {
n_predict: config.get('llamaMaxtokens') as number, n_predict: config.get('parameters.MaxTokens') as number,
mirostat: config.get('llamaMirostat') as number, mirostat: config.get('parameters.Mirostat') as number,
repeat_penalty: config.get('llamaRepeatPenalty') as number, repeat_penalty: config.get('parameters.RepeatPenalty') as number,
frequency_penalty: config.get('llamaFrequencyPenalty,') as number, frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
presence_penalty: config.get('llamaPresencePenalty,') as number, presence_penalty: config.get('parameters.PresencePenalty,') as number,
repeat_last_n: config.get('llamaRepeatCtx,') as number, repeat_last_n: config.get('parameters.RepeatCtx,') as number,
temperature: config.get('llamaTemperature') as number, temperature: config.get('parameters.Temperature') as number,
top_p: config.get('llamaTop_p') as number, top_p: config.get('parameters.Top_p') as number,
top_k: config.get('llamaTop_k') as number, top_k: config.get('parameters.Top_k') as number,
typical_p: config.get('llamaTypical_p') as number, typical_p: config.get('parameters.Typical_p') as number,
tfs_z: config.get('llamaTailfree_z,') as number, tfs_z: config.get('parameters.Tailfree_z,') as number,
seed: config.get('llamaSeed') as number, seed: config.get('parameters.Seed') as number,
stream: false, stream: false,
cache_prompt: config.get('llamaCachePrompt') as boolean, cache_prompt: config.get('CachePrompt') as boolean,
}; };
const fim = config.get('fimEnabled') as boolean; const fim = config.get('fimEnabled') as boolean;
@ -98,7 +98,7 @@ export function createLlamacppRequest(
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string { export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
const fim = config.get('fimEnabled') as boolean; const fim = config.get('fimEnabled') as boolean;
const fimRequest = config.get('useFillInMiddleRequest') as boolean; const fimRequest = config.get('useFillInMiddleRequest') as boolean;
let req_str: string = config.get('llamaHost') as string; let req_str: string = config.get('endpoint') as string;
if (fim === true && fimRequest === true) { if (fim === true && fimRequest === true) {
req_str += '/infill'; req_str += '/infill';

@ -3,10 +3,8 @@ import {
FetchErrorCause, FetchErrorCause,
ResponseData, ResponseData,
showMessageWithTimeout, showMessageWithTimeout,
showPendingStatusBar,
updateStatusBarMessage, updateStatusBarMessage,
} from './common'; } from './common';
import { config } from 'process';
// oogabooga/text-generation-webui OpenAI compatible API // oogabooga/text-generation-webui OpenAI compatible API
// https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API // https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
@ -98,19 +96,19 @@ export function createOpenAIAPIRequest(
): OpenAICompletionRequest { ): OpenAICompletionRequest {
let request: OpenAICompletionRequest = { let request: OpenAICompletionRequest = {
prompt: '', prompt: '',
max_tokens: config.get('llamaMaxtokens') as number, max_tokens: config.get('parameters.MaxTokens') as number,
mirostat_mode: config.get('llamaMirostat') as number, mirostat_mode: config.get('parameters.Mirostat') as number,
repetition_penalty: config.get('llamaRepeatPenalty') as number, repetition_penalty: config.get('parameters.RepeatPenalty') as number,
frequency_penalty: config.get('llamaFrequencyPenalty,') as number, frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
presence_penalty: config.get('llamaPresencePenalty,') as number, presence_penalty: config.get('parameters.PresencePenalty,') as number,
repetition_penalty_range: config.get('llamaRepeatCtx,') as number, repetition_penalty_range: config.get('parameters.RepeatCtx,') as number,
temperature: config.get('llamaTemperature') as number, temperature: config.get('parameters.Temperature') as number,
top_p: config.get('llamaTop_p') as number, top_p: config.get('parameters.Top_p') as number,
top_k: config.get('llamaTop_k') as number, top_k: config.get('parameters.Top_k') as number,
typical_p: config.get('llamaTypical_p') as number, typical_p: config.get('parameters.Typical_p') as number,
tfs: config.get('llamaTailfree_z,') as number, tfs: config.get('parameters.Tailfree_z,') as number,
seed: config.get('llamaSeed') as number, seed: config.get('parameters.Seed') as number,
stream: config.get('llamaAPIStream'), stream: config.get('parameters.stream') as boolean,
}; };
const fim = config.get('fimEnabled') as boolean; const fim = config.get('fimEnabled') as boolean;
@ -127,11 +125,13 @@ export function createOpenAIAPIRequest(
return request; return request;
} }
// for now only completions is implemented // for now only vv1/completions is implemented
// TODO: implement chat
export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string { export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
return (config.get('llamaHost') as string) + '/v1/completions'; return (config.get('endpoint') as string) + '/v1/completions';
} }
// make a request and parse the incoming data
export async function openAIMakeRequest( export async function openAIMakeRequest(
request_body: OpenAICompletionRequest, request_body: OpenAICompletionRequest,
endpoint: string endpoint: string
@ -169,41 +169,45 @@ export async function openAIMakeRequest(
// start a timer // start a timer
const timer_start = performance.now(); const timer_start = performance.now();
let chunk_number: number = 1;
for await (const chunk of response.body) { for await (const chunk of response.body) {
// FIXME: why the fuck do I have to do this shite // each chunk of data is a complete response in the form of a uint8 array
let data_text = new TextDecoder().decode(chunk); const data_text = Buffer.from(chunk as Uint8Array).toString();
data_text = data_text.substring(data_text.indexOf('{'));
let data: OpenAICompletionResponse;
try {
data = JSON.parse(data_text);
} catch (e: any) {
console.error(e);
return ret;
}
//console.log(JSON.stringify(data));
if (Object.hasOwn(data, 'detail') === true) { // each response chunk contains one or more data chunks, which in turn are just json data
data = data as OpenAICompletionFailureResponse; const data_chunks = data_text.split('data: ');
// TODO: why did it error? let data: OpenAICompletionResponse;
throw new Error('OpenAI Endpoint Error'); for (const data_string of data_chunks) {
data_string.trim();
if (data_string.length < 2) {
continue;
}
data = JSON.parse(data_string);
//console.log(JSON.stringify(data));
if (Object.hasOwn(data, 'detail') === true) {
data = data as OpenAICompletionFailureResponse;
// TODO: why did it error?
throw new Error('OpenAI Endpoint Error');
}
// unpack the data
data = data as OpenAICompletionSuccessResponse;
for (const choice of data.choices) {
ret.content += choice.text;
updateStatusBarMessage(chunk_number, choice.text);
chunk_number++;
}
ret.tokens += data.usage?.completion_tokens || 0;
} }
// unpack the data
data = data as OpenAICompletionSuccessResponse;
// FIXME: why the choices may be multiple?
// TODO: display the multiple choices
//console.log(data.choices[0].text);
updateStatusBarMessage(data.choices[0].text);
ret.content += data.choices[0].text;
ret.tokens += data.usage?.completion_tokens || 0;
} }
// stop the timer // stop the timer
const timer_end = performance.now(); const timer_end = performance.now();
ret.time = (timer_end - timer_start) / 1000.0; ret.time = (timer_end - timer_start) / 1000.0;
// clear the status bar item // clear the status bar item
updateStatusBarMessage(''); updateStatusBarMessage(0, '');
} catch (e: any) { } catch (e: any) {
console.error(e); console.error(e);
const err = e as TypeError; const err = e as TypeError;

Loading…
Cancel
Save