last chance before changing to OpenAI API

master
Alessandro Mauri 11 months ago
parent 04f8db150d
commit efb85c2cb4
  1. 5
      TODO.md
  2. 86
      package.json
  3. 40
      src/extension.ts

@ -1,4 +1,4 @@
[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */ [x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */
[] - test cancel token [] - test cancel token
[] - add fill in middle [] - add fill in middle
[x] - add config option to disable the extension [x] - add config option to disable the extension
@ -9,4 +9,5 @@
[] - add an icon [] - add an icon
[] - option to backup and restore model settings [] - option to backup and restore model settings
[] - add a window to quickly modify model configs [] - add a window to quickly modify model configs
[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample [] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample
[] - when trying to use completion when there is an active selection either substitute the selection or use the selection as context instead of the whole file

@ -137,23 +137,79 @@
"description": "Enable Fill in Middle mode, defaults to Up-to cursor context" "description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
}, },
"dumbpilot.llamaHost": { "dumbpilot.llamaHost": {
"type": "string", "type": "string",
"default": "http://0.0.0.0:8080", "default": "http://0.0.0.0:8080",
"description": "llama.cpp server address" "description": "llama.cpp server address"
}, },
"dumbpilot.llamaCtxsize": {"type": "number", "default": 2048}, "dumbpilot.llamaCtxsize": {
"dumbpilot.llamaMaxtokens": {"type": "number", "default": -1}, "type": "number",
"dumbpilot.llamaMirostat": {"type": "number", "default": 0}, "default": 2048
"dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11}, },
"dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0}, "dumbpilot.llamaMaxtokens": {
"dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0}, "type": "number",
"dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256}, "default": -1
"dumbpilot.llamaTemperature": {"type": "number", "default": 0.25}, },
"dumbpilot.llamaTop_p": {"type": "number", "default": 0.95}, "dumbpilot.llamaMirostat": {
"dumbpilot.llamaTop_k": {"type": "number", "default": 40}, "type": "number",
"dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, "default": 0
"dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, },
"dumbpilot.llamaSeed": {"type": "number", "default": -1}, "dumbpilot.llamaRepeatPenalty": {
"type": "number",
"default": 1.11
},
"dumbpilot.llamaFrequencyPenalty": {
"type": "number",
"default": 0.0
},
"dumbpilot.llamaPresencePenalty": {
"type": "number",
"default": 0.0
},
"dumbpilot.llamaRepeatCtx": {
"type": "number",
"default": 256
},
"dumbpilot.llamaTemperature": {
"type": "number",
"default": 0.25
},
"dumbpilot.llamaTop_p": {
"type": "number",
"default": 0.95
},
"dumbpilot.llamaTop_k": {
"type": "number",
"default": 40
},
"dumbpilot.llamaTypical_p": {
"type": "number",
"default": 0.95
},
"dumbpilot.llamaTailfree_z": {
"type": "number",
"default": 0.5
},
"dumbpilot.llamaSeed": {
"type": "number",
"default": -1
},
"dumbpilot.fimBeginString": {
"type": "string",
"default": "<|fim▁begin|>"
},
"dumbpilot.fimHoleString": {
"type": "string",
"default": "<|fim▁hole|>"
},
"dumbpilot.fimEndString": {
"type": "string",
"default": "<|fim▁end|>"
},
"dumbpilot.useFillInMiddleRequest": {
"type": "boolean",
"default": false,
"description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text"
},
"dumbpilot.llamaCachePrompt": { "dumbpilot.llamaCachePrompt": {
"type": "boolean", "type": "boolean",
"default": true, "default": true,
@ -161,7 +217,7 @@
}, },
"dumbpilot.llamaInstructModel": { "dumbpilot.llamaInstructModel": {
"type": "boolean", "type": "boolean",
"default": "false", "default": false,
"description": "For use with instruct models" "description": "For use with instruct models"
}, },
"dumbpilot.llamaSystemPrompt": { "dumbpilot.llamaSystemPrompt": {

@ -33,14 +33,14 @@ type llamaRequest = {
n_predict: number, n_predict: number,
mirostat: number, mirostat: number,
repeat_penalty: number, repeat_penalty: number,
frequency_penalty: number, frequency_penalty: number,
presence_penalty: number, presence_penalty: number,
repeat_last_n: number, repeat_last_n: number,
temperature: number, temperature: number,
top_p: number, top_p: number,
top_k: number, top_k: number,
typical_p: number, typical_p: number,
tfs_z: number, tfs_z: number,
seed: number, seed: number,
stream: boolean, stream: boolean,
cache_prompt: boolean, cache_prompt: boolean,
@ -63,7 +63,7 @@ function clean_text(txt: string): string {
// these are already done by JSON.stringify() // these are already done by JSON.stringify()
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace((/\t/gm, "\\t")); //txt = txt.replace((/\t/gm, "\\t"));
// FIXME: I don't know if this penalizes some results since most people indent with spaces // FIXME: I don't know if this penalizes some results since most people indent with spaces
//txt = txt.replace(/\s+/gm, " "); //txt = txt.replace(/\s+/gm, " ");
return txt; return txt;
@ -131,7 +131,7 @@ export function activate(context: vscode.ExtensionContext) {
if (config.get("completionEnabled") as boolean === false) { if (config.get("completionEnabled") as boolean === false) {
return null; return null;
} }
// Since for every completion we will query the server, we want to filter out // Since for every completion we will query the server, we want to filter out
// automatic completion invokes // automatic completion invokes
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
@ -155,7 +155,7 @@ export function activate(context: vscode.ExtensionContext) {
const doc_off = document.offsetAt(position); const doc_off = document.offsetAt(position);
let doc_before = doc_text.substring(0, doc_off); let doc_before = doc_text.substring(0, doc_off);
let doc_after = doc_text.substring(doc_off); let doc_after = doc_text.substring(doc_off);
// make it cleaner in hope to reduce the number of tokens // make it cleaner in hope to reduce the number of tokens
doc_before = clean_text(doc_before); doc_before = clean_text(doc_before);
doc_after = clean_text(doc_after); doc_after = clean_text(doc_after);
@ -173,6 +173,7 @@ export function activate(context: vscode.ExtensionContext) {
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
const fim = config.get("fimEnabled") as boolean; const fim = config.get("fimEnabled") as boolean;
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
let req_str: string; let req_str: string;
let request: llamaRequest = { let request: llamaRequest = {
n_predict: config.get("llamaMaxtokens") as number, n_predict: config.get("llamaMaxtokens") as number,
@ -190,16 +191,27 @@ export function activate(context: vscode.ExtensionContext) {
stream: false, stream: false,
cache_prompt: config.get("llamaCachePrompt") as boolean cache_prompt: config.get("llamaCachePrompt") as boolean
}; };
// check if fill in middle is enabled and fill the request prompt accordingly
if (fim === true) { if (fim === true) {
req_str = '/infill'; if (fimRequest === true) {
request.input_prefix = doc_before; req_str = '/infill';
request.input_suffix = doc_after; request.input_prefix = doc_before;
request.input_suffix = doc_after;
} else {
const fim_beg = config.get("fimBeginString") as string;
const fim_hole = config.get("fimHoleString") as string;
const fim_end = config.get("fimEndString") as string;
req_str = '/completion';
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
}
} else { } else {
req_str = '/completion'; req_str = '/completion';
request.prompt = doc_before; request.prompt = doc_before;
} }
console.log(JSON.stringify(request));
let data: llamaData; let data: llamaData;
// try to send the request to the running server // try to send the request to the running server
try { try {
@ -219,7 +231,7 @@ export function activate(context: vscode.ExtensionContext) {
if (response.ok === false) { if (response.ok === false) {
throw new Error("llama server request is not ok??"); throw new Error("llama server request is not ok??");
} }
data = await response.json() as llamaData; data = await response.json() as llamaData;
const gen_tokens = data.timings.predicted_n; const gen_tokens = data.timings.predicted_n;
const gen_time = (data.timings.predicted_ms / 1000).toFixed(2); const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
@ -234,7 +246,7 @@ export function activate(context: vscode.ExtensionContext) {
showMessageWithTimeout('dumbpilot error: ' + estr, 3000); showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
return null; return null;
}; };
result.items.push({insertText: data.content, range: new vscode.Range(position, position)}); result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
return result; return result;
}, },

Loading…
Cancel
Save