various fixes, better config, better fetch handling
This commit is contained in:
parent
12f3c82d3e
commit
90477d164d
45
package.json
45
package.json
@ -141,60 +141,60 @@
|
||||
"default": false,
|
||||
"description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
|
||||
},
|
||||
"dumbpilot.llamaHost": {
|
||||
"dumbpilot.endpoint": {
|
||||
"type": "string",
|
||||
"default": "http://0.0.0.0:8080",
|
||||
"description": "llama.cpp server address"
|
||||
},
|
||||
"dumbpilot.llamaCtxsize": {
|
||||
"dumbpilot.parameters.ContextSize": {
|
||||
"type": "number",
|
||||
"default": 2048
|
||||
},
|
||||
"dumbpilot.llamaMaxtokens": {
|
||||
"dumbpilot.parameters.MaxTokens": {
|
||||
"type": "number",
|
||||
"default": -1
|
||||
},
|
||||
"dumbpilot.llamaMirostat": {
|
||||
"dumbpilot.parameters.Mirostat": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"dumbpilot.llamaRepeatPenalty": {
|
||||
"dumbpilot.parameters.RepeatPenalty": {
|
||||
"type": "number",
|
||||
"default": 1.11
|
||||
},
|
||||
"dumbpilot.llamaFrequencyPenalty": {
|
||||
"dumbpilot.parameters.FrequencyPenalty": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"dumbpilot.llamaPresencePenalty": {
|
||||
"dumbpilot.parameters.PresencePenalty": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"dumbpilot.llamaRepeatCtx": {
|
||||
"dumbpilot.parameters.RepeatCtx": {
|
||||
"type": "number",
|
||||
"default": 256
|
||||
},
|
||||
"dumbpilot.llamaTemperature": {
|
||||
"dumbpilot.parameters.Temperature": {
|
||||
"type": "number",
|
||||
"default": 0.25
|
||||
},
|
||||
"dumbpilot.llamaTop_p": {
|
||||
"dumbpilot.parameters.Top_p": {
|
||||
"type": "number",
|
||||
"default": 0.95
|
||||
},
|
||||
"dumbpilot.llamaTop_k": {
|
||||
"dumbpilot.parameters.Top_k": {
|
||||
"type": "number",
|
||||
"default": 40
|
||||
},
|
||||
"dumbpilot.llamaTypical_p": {
|
||||
"dumbpilot.parameters.Typical_p": {
|
||||
"type": "number",
|
||||
"default": 0.95
|
||||
},
|
||||
"dumbpilot.llamaTailfree_z": {
|
||||
"dumbpilot.parameters.Tailfree_z": {
|
||||
"type": "number",
|
||||
"default": 0.5
|
||||
},
|
||||
"dumbpilot.llamaSeed": {
|
||||
"dumbpilot.parameters.Seed": {
|
||||
"type": "number",
|
||||
"default": -1
|
||||
},
|
||||
@ -215,31 +215,32 @@
|
||||
"default": false,
|
||||
"description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text"
|
||||
},
|
||||
"dumbpilot.llamaCachePrompt": {
|
||||
"dumbpilot.CachePrompt": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable prompt caching for faster results"
|
||||
},
|
||||
"dumbpilot.llamaInstructModel": {
|
||||
"dumbpilot.model.InstructModel": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "For use with instruct models"
|
||||
},
|
||||
"dumbpilot.llamaSystemPrompt": {
|
||||
"dumbpilot.model.SystemPrompt": {
|
||||
"type": "string",
|
||||
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
|
||||
},
|
||||
"dumbpilot.llamaUseOpenAIAPI": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"dumbpilot.API": {
|
||||
"type": "string",
|
||||
"enum": ["llamacpp", "OpenAI"],
|
||||
"default": "OpenAI",
|
||||
"description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API"
|
||||
},
|
||||
"dumbpilot.llamaModelName": {
|
||||
"dumbpilot.model.ModelName": {
|
||||
"type": "string",
|
||||
"default": "deepseek-coder-6.7B-base.gguf",
|
||||
"description": "Name of the model to use, only works in OpenAI API mode"
|
||||
},
|
||||
"dumbpilot.llamaAPIStream": {
|
||||
"dumbpilot.parameters.stream": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ export async function showPendingStatusBar(
|
||||
|
||||
let st_msg: vscode.StatusBarItem | undefined;
|
||||
|
||||
export function updateStatusBarMessage(text: string) {
|
||||
export function updateStatusBarMessage(total: number, text: string) {
|
||||
if (!st_msg) {
|
||||
st_msg = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, -100);
|
||||
}
|
||||
@ -68,9 +68,10 @@ export function updateStatusBarMessage(text: string) {
|
||||
const run_color = new vscode.ThemeColor('statusBarItem.warningBackground');
|
||||
if (text.length > 0) {
|
||||
st_msg.backgroundColor = run_color;
|
||||
st_msg.text = '$(megaphone) ' + text.trim();
|
||||
st_msg.text = total + ' $(megaphone) ' + text.trim();
|
||||
st_msg.show();
|
||||
} else {
|
||||
st_msg.text = '';
|
||||
st_msg.hide();
|
||||
}
|
||||
}
|
||||
|
@ -14,24 +14,11 @@ import {
|
||||
openAIMakeRequest,
|
||||
} from './openai-api';
|
||||
import {
|
||||
FetchErrorCause,
|
||||
ResponseData,
|
||||
showMessageWithTimeout,
|
||||
showPendingStatusBar,
|
||||
updateStatusBarMessage,
|
||||
} from './common';
|
||||
|
||||
// clean up the document
|
||||
function clean_text(txt: string): string {
|
||||
// these are already done by JSON.stringify()
|
||||
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
|
||||
//txt = txt.replace((/\t/gm, "\\t"));
|
||||
|
||||
// FIXME: I don't know if this penalizes some results since most people indent with spaces
|
||||
//txt = txt.replace(/\s+/gm, " ");
|
||||
return txt;
|
||||
}
|
||||
|
||||
export function activate(context: vscode.ExtensionContext) {
|
||||
console.log('dumbpilot is now active');
|
||||
|
||||
@ -54,8 +41,6 @@ export function activate(context: vscode.ExtensionContext) {
|
||||
config.update('completionEnabled', false);
|
||||
});
|
||||
|
||||
updateStatusBarMessage('');
|
||||
|
||||
// Register a new provider of inline completions, this does not decide how it is invoked
|
||||
// only what the completion should be
|
||||
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
||||
@ -90,10 +75,6 @@ export function activate(context: vscode.ExtensionContext) {
|
||||
let doc_before = doc_text.substring(0, doc_off);
|
||||
let doc_after = doc_text.substring(doc_off);
|
||||
|
||||
// make it cleaner in hope to reduce the number of tokens
|
||||
doc_before = clean_text(doc_before);
|
||||
doc_after = clean_text(doc_after);
|
||||
|
||||
// TODO: prune text up to a maximum context length
|
||||
|
||||
// Prefix the filename in a comment
|
||||
@ -109,7 +90,7 @@ export function activate(context: vscode.ExtensionContext) {
|
||||
// actially make the request
|
||||
let data: ResponseData = { content: '', tokens: 0, time: 0 };
|
||||
let promise: Promise<ResponseData>;
|
||||
if (config.get('llamaUseOpenAIAPI') === true) {
|
||||
if (config.get('API') === 'OpenAI') {
|
||||
const request: OpenAICompletionRequest = createOpenAIAPIRequest(
|
||||
config,
|
||||
doc_before,
|
||||
|
@ -59,20 +59,20 @@ export function createLlamacppRequest(
|
||||
doc_after: string
|
||||
): LlamaRequest {
|
||||
let request: LlamaRequest = {
|
||||
n_predict: config.get('llamaMaxtokens') as number,
|
||||
mirostat: config.get('llamaMirostat') as number,
|
||||
repeat_penalty: config.get('llamaRepeatPenalty') as number,
|
||||
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
|
||||
presence_penalty: config.get('llamaPresencePenalty,') as number,
|
||||
repeat_last_n: config.get('llamaRepeatCtx,') as number,
|
||||
temperature: config.get('llamaTemperature') as number,
|
||||
top_p: config.get('llamaTop_p') as number,
|
||||
top_k: config.get('llamaTop_k') as number,
|
||||
typical_p: config.get('llamaTypical_p') as number,
|
||||
tfs_z: config.get('llamaTailfree_z,') as number,
|
||||
seed: config.get('llamaSeed') as number,
|
||||
n_predict: config.get('parameters.MaxTokens') as number,
|
||||
mirostat: config.get('parameters.Mirostat') as number,
|
||||
repeat_penalty: config.get('parameters.RepeatPenalty') as number,
|
||||
frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
|
||||
presence_penalty: config.get('parameters.PresencePenalty,') as number,
|
||||
repeat_last_n: config.get('parameters.RepeatCtx,') as number,
|
||||
temperature: config.get('parameters.Temperature') as number,
|
||||
top_p: config.get('parameters.Top_p') as number,
|
||||
top_k: config.get('parameters.Top_k') as number,
|
||||
typical_p: config.get('parameters.Typical_p') as number,
|
||||
tfs_z: config.get('parameters.Tailfree_z,') as number,
|
||||
seed: config.get('parameters.Seed') as number,
|
||||
stream: false,
|
||||
cache_prompt: config.get('llamaCachePrompt') as boolean,
|
||||
cache_prompt: config.get('CachePrompt') as boolean,
|
||||
};
|
||||
|
||||
const fim = config.get('fimEnabled') as boolean;
|
||||
@ -98,7 +98,7 @@ export function createLlamacppRequest(
|
||||
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
|
||||
const fim = config.get('fimEnabled') as boolean;
|
||||
const fimRequest = config.get('useFillInMiddleRequest') as boolean;
|
||||
let req_str: string = config.get('llamaHost') as string;
|
||||
let req_str: string = config.get('endpoint') as string;
|
||||
|
||||
if (fim === true && fimRequest === true) {
|
||||
req_str += '/infill';
|
||||
|
@ -3,10 +3,8 @@ import {
|
||||
FetchErrorCause,
|
||||
ResponseData,
|
||||
showMessageWithTimeout,
|
||||
showPendingStatusBar,
|
||||
updateStatusBarMessage,
|
||||
} from './common';
|
||||
import { config } from 'process';
|
||||
|
||||
// oogabooga/text-generation-webui OpenAI compatible API
|
||||
// https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
||||
@ -98,19 +96,19 @@ export function createOpenAIAPIRequest(
|
||||
): OpenAICompletionRequest {
|
||||
let request: OpenAICompletionRequest = {
|
||||
prompt: '',
|
||||
max_tokens: config.get('llamaMaxtokens') as number,
|
||||
mirostat_mode: config.get('llamaMirostat') as number,
|
||||
repetition_penalty: config.get('llamaRepeatPenalty') as number,
|
||||
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
|
||||
presence_penalty: config.get('llamaPresencePenalty,') as number,
|
||||
repetition_penalty_range: config.get('llamaRepeatCtx,') as number,
|
||||
temperature: config.get('llamaTemperature') as number,
|
||||
top_p: config.get('llamaTop_p') as number,
|
||||
top_k: config.get('llamaTop_k') as number,
|
||||
typical_p: config.get('llamaTypical_p') as number,
|
||||
tfs: config.get('llamaTailfree_z,') as number,
|
||||
seed: config.get('llamaSeed') as number,
|
||||
stream: config.get('llamaAPIStream'),
|
||||
max_tokens: config.get('parameters.MaxTokens') as number,
|
||||
mirostat_mode: config.get('parameters.Mirostat') as number,
|
||||
repetition_penalty: config.get('parameters.RepeatPenalty') as number,
|
||||
frequency_penalty: config.get('parameters.FrequencyPenalty,') as number,
|
||||
presence_penalty: config.get('parameters.PresencePenalty,') as number,
|
||||
repetition_penalty_range: config.get('parameters.RepeatCtx,') as number,
|
||||
temperature: config.get('parameters.Temperature') as number,
|
||||
top_p: config.get('parameters.Top_p') as number,
|
||||
top_k: config.get('parameters.Top_k') as number,
|
||||
typical_p: config.get('parameters.Typical_p') as number,
|
||||
tfs: config.get('parameters.Tailfree_z,') as number,
|
||||
seed: config.get('parameters.Seed') as number,
|
||||
stream: config.get('parameters.stream') as boolean,
|
||||
};
|
||||
|
||||
const fim = config.get('fimEnabled') as boolean;
|
||||
@ -127,11 +125,13 @@ export function createOpenAIAPIRequest(
|
||||
return request;
|
||||
}
|
||||
|
||||
// for now only completions is implemented
|
||||
// for now only vv1/completions is implemented
|
||||
// TODO: implement chat
|
||||
export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
|
||||
return (config.get('llamaHost') as string) + '/v1/completions';
|
||||
return (config.get('endpoint') as string) + '/v1/completions';
|
||||
}
|
||||
|
||||
// make a request and parse the incoming data
|
||||
export async function openAIMakeRequest(
|
||||
request_body: OpenAICompletionRequest,
|
||||
endpoint: string
|
||||
@ -169,18 +169,21 @@ export async function openAIMakeRequest(
|
||||
// start a timer
|
||||
const timer_start = performance.now();
|
||||
|
||||
let chunk_number: number = 1;
|
||||
for await (const chunk of response.body) {
|
||||
// FIXME: why the fuck do I have to do this shite
|
||||
let data_text = new TextDecoder().decode(chunk);
|
||||
data_text = data_text.substring(data_text.indexOf('{'));
|
||||
let data: OpenAICompletionResponse;
|
||||
// each chunk of data is a complete response in the form of a uint8 array
|
||||
const data_text = Buffer.from(chunk as Uint8Array).toString();
|
||||
|
||||
try {
|
||||
data = JSON.parse(data_text);
|
||||
} catch (e: any) {
|
||||
console.error(e);
|
||||
return ret;
|
||||
// each response chunk contains one or more data chunks, which in turn are just json data
|
||||
const data_chunks = data_text.split('data: ');
|
||||
let data: OpenAICompletionResponse;
|
||||
for (const data_string of data_chunks) {
|
||||
data_string.trim();
|
||||
if (data_string.length < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
data = JSON.parse(data_string);
|
||||
//console.log(JSON.stringify(data));
|
||||
|
||||
if (Object.hasOwn(data, 'detail') === true) {
|
||||
@ -188,22 +191,23 @@ export async function openAIMakeRequest(
|
||||
// TODO: why did it error?
|
||||
throw new Error('OpenAI Endpoint Error');
|
||||
}
|
||||
|
||||
// unpack the data
|
||||
data = data as OpenAICompletionSuccessResponse;
|
||||
// FIXME: why the choices may be multiple?
|
||||
// TODO: display the multiple choices
|
||||
//console.log(data.choices[0].text);
|
||||
updateStatusBarMessage(data.choices[0].text);
|
||||
ret.content += data.choices[0].text;
|
||||
|
||||
for (const choice of data.choices) {
|
||||
ret.content += choice.text;
|
||||
updateStatusBarMessage(chunk_number, choice.text);
|
||||
chunk_number++;
|
||||
}
|
||||
ret.tokens += data.usage?.completion_tokens || 0;
|
||||
}
|
||||
}
|
||||
|
||||
// stop the timer
|
||||
const timer_end = performance.now();
|
||||
ret.time = (timer_end - timer_start) / 1000.0;
|
||||
// clear the status bar item
|
||||
updateStatusBarMessage('');
|
||||
updateStatusBarMessage(0, '');
|
||||
} catch (e: any) {
|
||||
console.error(e);
|
||||
const err = e as TypeError;
|
||||
|
Loading…
Reference in New Issue
Block a user