various fixes, better config, better fetch handling

master
Alessandro Mauri 5 months ago
parent 12f3c82d3e
commit 90477d164d
Signed by: alema
GPG Key ID: 2B7BF9531FF03BE8
  1. 45
      package.json
  2. 5
      src/common.ts
  3. 21
      src/extension.ts
  4. 28
      src/llamacpp-api.ts
  5. 90
      src/openai-api.ts

@ -141,60 +141,60 @@
"default": false,
"description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
},
"dumbpilot.llamaHost": {
"dumbpilot.endpoint": {
"type": "string",
"default": "http://0.0.0.0:8080",
"description": "llama.cpp server address"
},
"dumbpilot.llamaCtxsize": {
"dumbpilot.parameters.ContextSize": {
"type": "number",
"default": 2048
},
"dumbpilot.llamaMaxtokens": {
"dumbpilot.parameters.MaxTokens": {
"type": "number",
"default": -1
},
"dumbpilot.llamaMirostat": {
"dumbpilot.parameters.Mirostat": {
"type": "number",
"default": 0
},
"dumbpilot.llamaRepeatPenalty": {
"dumbpilot.parameters.RepeatPenalty": {
"type": "number",
"default": 1.11
},
"dumbpilot.llamaFrequencyPenalty": {
"dumbpilot.parameters.FrequencyPenalty": {
"type": "number",
"default": 0
},
"dumbpilot.llamaPresencePenalty": {
"dumbpilot.parameters.PresencePenalty": {
"type": "number",
"default": 0
},
"dumbpilot.llamaRepeatCtx": {
"dumbpilot.parameters.RepeatCtx": {
"type": "number",
"default": 256
},
"dumbpilot.llamaTemperature": {
"dumbpilot.parameters.Temperature": {
"type": "number",
"default": 0.25
},
"dumbpilot.llamaTop_p": {
"dumbpilot.parameters.Top_p": {
"type": "number",
"default": 0.95
},
"dumbpilot.llamaTop_k": {
"dumbpilot.parameters.Top_k": {
"type": "number",
"default": 40
},
"dumbpilot.llamaTypical_p": {
"dumbpilot.parameters.Typical_p": {
"type": "number",
"default": 0.95
},
"dumbpilot.llamaTailfree_z": {
"dumbpilot.parameters.Tailfree_z": {
"type": "number",
"default": 0.5
},
"dumbpilot.llamaSeed": {
"dumbpilot.parameters.Seed": {
"type": "number",
"default": -1
},
@ -215,31 +215,32 @@
"default": false,
"description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text"
},
"dumbpilot.llamaCachePrompt": {
"dumbpilot.CachePrompt": {
"type": "boolean",
"default": true,
"description": "Enable prompt caching for faster results"
},
"dumbpilot.llamaInstructModel": {
"dumbpilot.model.InstructModel": {
"type": "boolean",
"default": false,
"description": "For use with instruct models"
},
"dumbpilot.llamaSystemPrompt": {
"dumbpilot.model.SystemPrompt": {
"type": "string",
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
},
"dumbpilot.llamaUseOpenAIAPI": {
"type": "boolean",
"default": true,
"dumbpilot.API": {
"type": "string",
"enum": ["llamacpp", "OpenAI"],
"default": "OpenAI",
"description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API"
},
"dumbpilot.llamaModelName": {
"dumbpilot.model.ModelName": {
"type": "string",
"default": "deepseek-coder-6.7B-base.gguf",
"description": "Name of the model to use, only works in OpenAI API mode"
},
"dumbpilot.llamaAPIStream": {
"dumbpilot.parameters.stream": {
"type": "boolean",
"default": false
}

@ -60,7 +60,7 @@ export async function showPendingStatusBar(
let st_msg: vscode.StatusBarItem | undefined;
export function updateStatusBarMessage(text: string) {
export function updateStatusBarMessage(total: number, text: string) {
if (!st_msg) {
st_msg = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, -100);
}
@ -68,9 +68,10 @@ export function updateStatusBarMessage(text: string) {
const run_color = new vscode.ThemeColor('statusBarItem.warningBackground');
if (text.length > 0) {
st_msg.backgroundColor = run_color;
st_msg.text = '$(megaphone) ' + text.trim();
st_msg.text = total + ' $(megaphone) ' + text.trim();
st_msg.show();
} else {
st_msg.text = '';
st_msg.hide();
}
}

@ -14,24 +14,11 @@ import {
openAIMakeRequest,
} from './openai-api';
import {
FetchErrorCause,
ResponseData,
showMessageWithTimeout,
showPendingStatusBar,
updateStatusBarMessage,
} from './common';
// clean up the document
function clean_text(txt: string): string {
// these are already done by JSON.stringify()
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace((/\t/gm, "\\t"));
// FIXME: I don't know if this penalizes some results since most people indent with spaces
//txt = txt.replace(/\s+/gm, " ");
return txt;
}
export function activate(context: vscode.ExtensionContext) {
console.log('dumbpilot is now active');
@ -54,8 +41,6 @@ export function activate(context: vscode.ExtensionContext) {
config.update('completionEnabled', false);
});
updateStatusBarMessage('');
// Register a new provider of inline completions, this does not decide how it is invoked
// only what the completion should be
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
@ -90,10 +75,6 @@ export function activate(context: vscode.ExtensionContext) {
let doc_before = doc_text.substring(0, doc_off);
let doc_after = doc_text.substring(doc_off);
// make it cleaner in hope to reduce the number of tokens
doc_before = clean_text(doc_before);
doc_after = clean_text(doc_after);
// TODO: prune text up to a maximum context length
// Prefix the filename in a comment
@ -109,7 +90,7 @@ export function activate(context: vscode.ExtensionContext) {
// actially make the request
let data: ResponseData = { content: '', tokens: 0, time: 0 };
let promise: Promise<ResponseData>;
if (config.get('llamaUseOpenAIAPI') === true) {
if (config.get('API') === 'OpenAI') {
const request: OpenAICompletionRequest = createOpenAIAPIRequest(
config,
doc_before,

@ -59,20 +59,20 @@ export function createLlamacppRequest(
doc_after: string
): LlamaRequest {
let request: LlamaRequest = {
n_predict: config.get('llamaMaxtokens') as number,
mirostat: config.get('llamaMirostat') as number,
repeat_penalty: config.get('llamaRepeatPenalty') as number,
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
presence_penalty: config.get('llamaPresencePenalty,') as number,
repeat_last_n: config.get('llamaRepeatCtx,') as number,
temperature: config.get('llamaTemperature') as number,
top_p: config.get('llamaTop_p') as number,
top_k: config.get('llamaTop_k') as number,
typical_p: config.get('llamaTypical_p') as number,
tfs_z: config.get('llamaTailfree_z,') as number,
seed: config.get('llamaSeed') as number,
n_predict: config.get('parameters.MaxTokens') as number,
mirostat: config.get('parameters.Mirostat') as number,
repeat_penalty: config.get('parameters.RepeatPenalty') as number,
frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
presence_penalty: config.get('parameters.PresencePenalty,') as number,
repeat_last_n: config.get('parameters.RepeatCtx,') as number,
temperature: config.get('parameters.Temperature') as number,
top_p: config.get('parameters.Top_p') as number,
top_k: config.get('parameters.Top_k') as number,
typical_p: config.get('parameters.Typical_p') as number,
tfs_z: config.get('parameters.Tailfree_z,') as number,
seed: config.get('parameters.Seed') as number,
stream: false,
cache_prompt: config.get('llamaCachePrompt') as boolean,
cache_prompt: config.get('CachePrompt') as boolean,
};
const fim = config.get('fimEnabled') as boolean;
@ -98,7 +98,7 @@ export function createLlamacppRequest(
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
const fim = config.get('fimEnabled') as boolean;
const fimRequest = config.get('useFillInMiddleRequest') as boolean;
let req_str: string = config.get('llamaHost') as string;
let req_str: string = config.get('endpoint') as string;
if (fim === true && fimRequest === true) {
req_str += '/infill';

@ -3,10 +3,8 @@ import {
FetchErrorCause,
ResponseData,
showMessageWithTimeout,
showPendingStatusBar,
updateStatusBarMessage,
} from './common';
import { config } from 'process';
// oogabooga/text-generation-webui OpenAI compatible API
// https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
@ -98,19 +96,19 @@ export function createOpenAIAPIRequest(
): OpenAICompletionRequest {
let request: OpenAICompletionRequest = {
prompt: '',
max_tokens: config.get('llamaMaxtokens') as number,
mirostat_mode: config.get('llamaMirostat') as number,
repetition_penalty: config.get('llamaRepeatPenalty') as number,
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
presence_penalty: config.get('llamaPresencePenalty,') as number,
repetition_penalty_range: config.get('llamaRepeatCtx,') as number,
temperature: config.get('llamaTemperature') as number,
top_p: config.get('llamaTop_p') as number,
top_k: config.get('llamaTop_k') as number,
typical_p: config.get('llamaTypical_p') as number,
tfs: config.get('llamaTailfree_z,') as number,
seed: config.get('llamaSeed') as number,
stream: config.get('llamaAPIStream'),
max_tokens: config.get('parameters.MaxTokens') as number,
mirostat_mode: config.get('parameters.Mirostat') as number,
repetition_penalty: config.get('parameters.RepeatPenalty') as number,
frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
presence_penalty: config.get('parameters.PresencePenalty,') as number,
repetition_penalty_range: config.get('parameters.RepeatCtx,') as number,
temperature: config.get('parameters.Temperature') as number,
top_p: config.get('parameters.Top_p') as number,
top_k: config.get('parameters.Top_k') as number,
typical_p: config.get('parameters.Typical_p') as number,
tfs: config.get('parameters.Tailfree_z,') as number,
seed: config.get('parameters.Seed') as number,
stream: config.get('parameters.stream') as boolean,
};
const fim = config.get('fimEnabled') as boolean;
@ -127,11 +125,13 @@ export function createOpenAIAPIRequest(
return request;
}
// for now only completions is implemented
// for now only vv1/completions is implemented
// TODO: implement chat
export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
return (config.get('llamaHost') as string) + '/v1/completions';
return (config.get('endpoint') as string) + '/v1/completions';
}
// make a request and parse the incoming data
export async function openAIMakeRequest(
request_body: OpenAICompletionRequest,
endpoint: string
@ -169,41 +169,45 @@ export async function openAIMakeRequest(
// start a timer
const timer_start = performance.now();
let chunk_number: number = 1;
for await (const chunk of response.body) {
// FIXME: why the fuck do I have to do this shite
let data_text = new TextDecoder().decode(chunk);
data_text = data_text.substring(data_text.indexOf('{'));
let data: OpenAICompletionResponse;
try {
data = JSON.parse(data_text);
} catch (e: any) {
console.error(e);
return ret;
}
//console.log(JSON.stringify(data));
// each chunk of data is a complete response in the form of a uint8 array
const data_text = Buffer.from(chunk as Uint8Array).toString();
if (Object.hasOwn(data, 'detail') === true) {
data = data as OpenAICompletionFailureResponse;
// TODO: why did it error?
throw new Error('OpenAI Endpoint Error');
// each response chunk contains one or more data chunks, which in turn are just json data
const data_chunks = data_text.split('data: ');
let data: OpenAICompletionResponse;
for (const data_string of data_chunks) {
data_string.trim();
if (data_string.length < 2) {
continue;
}
data = JSON.parse(data_string);
//console.log(JSON.stringify(data));
if (Object.hasOwn(data, 'detail') === true) {
data = data as OpenAICompletionFailureResponse;
// TODO: why did it error?
throw new Error('OpenAI Endpoint Error');
}
// unpack the data
data = data as OpenAICompletionSuccessResponse;
for (const choice of data.choices) {
ret.content += choice.text;
updateStatusBarMessage(chunk_number, choice.text);
chunk_number++;
}
ret.tokens += data.usage?.completion_tokens || 0;
}
// unpack the data
data = data as OpenAICompletionSuccessResponse;
// FIXME: why the choices may be multiple?
// TODO: display the multiple choices
//console.log(data.choices[0].text);
updateStatusBarMessage(data.choices[0].text);
ret.content += data.choices[0].text;
ret.tokens += data.usage?.completion_tokens || 0;
}
// stop the timer
const timer_end = performance.now();
ret.time = (timer_end - timer_start) / 1000.0;
// clear the status bar item
updateStatusBarMessage('');
updateStatusBarMessage(0, '');
} catch (e: any) {
console.error(e);
const err = e as TypeError;

Loading…
Cancel
Save