Compare commits

..

No commits in common. "04f8db150deae3a9cdd2c8940f9d59f96b58a359" and "00e22e83583381304a2c7ea3973d2787528b6abd" have entirely different histories.

4 changed files with 69 additions and 153 deletions

2
.vscode/launch.json vendored
View File

@ -23,7 +23,7 @@
"request": "launch", "request": "launch",
"args": [ "args": [
"--extensionDevelopmentPath=${workspaceFolder}", "--extensionDevelopmentPath=${workspaceFolder}",
"--extensionTestsPath=${workspaceFolder}/out/test/" "--extensionTestsPath=${workspaceFolder}/out/test/suite/index"
], ],
"outFiles": [ "outFiles": [
"${workspaceFolder}/out/test/**/*.js" "${workspaceFolder}/out/test/**/*.js"

10
TODO.md
View File

@ -1,12 +1,6 @@
[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */ [x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */
[] - test cancel token [] - test cancel token
[] - add fill in middle [] - add fill in middle
[x] - add config option to disable the extension [] - add config option to disable the extension
[] - add command to test and query connection to server [] - add command to test and query connection to server
[x] - add feedback when waiting response [] - add feedback when waiting response
[] - add a chat window
[] - if the model is an instruct-type add the system prompt to the chat
[] - add an icon
[] - option to backup and restore model settings
[] - add a window to quickly modify model configs
[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample

View File

@ -122,54 +122,7 @@
"when": "editorTextFocus", "when": "editorTextFocus",
"command": "editor.action.inlineSuggest.trigger" "command": "editor.action.inlineSuggest.trigger"
} }
], ]
"configuration": {
"title": "dumbpilot",
"properties": {
"dumbpilot.completionEnabled": {
"type": "boolean",
"default": true,
"description": "Enable predictive code completion"
},
"dumbpilot.fimEnabled": {
"type": "boolean",
"default": false,
"description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
},
"dumbpilot.llamaHost": {
"type": "string",
"default": "http://0.0.0.0:8080",
"description": "llama.cpp server address"
},
"dumbpilot.llamaCtxsize": {"type": "number", "default": 2048},
"dumbpilot.llamaMaxtokens": {"type": "number", "default": -1},
"dumbpilot.llamaMirostat": {"type": "number", "default": 0},
"dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11},
"dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0},
"dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0},
"dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256},
"dumbpilot.llamaTemperature": {"type": "number", "default": 0.25},
"dumbpilot.llamaTop_p": {"type": "number", "default": 0.95},
"dumbpilot.llamaTop_k": {"type": "number", "default": 40},
"dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95},
"dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5},
"dumbpilot.llamaSeed": {"type": "number", "default": -1},
"dumbpilot.llamaCachePrompt": {
"type": "boolean",
"default": true,
"description": "Enable prompt caching for faster results"
},
"dumbpilot.llamaInstructModel": {
"type": "boolean",
"default": "false",
"description": "For use with instruct models"
},
"dumbpilot.llamaSystemPrompt": {
"type": "string",
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
}
}
}
}, },
"scripts": { "scripts": {
"vscode:prepublish": "npm run compile", "vscode:prepublish": "npm run compile",

View File

@ -29,7 +29,7 @@ type llamaData = {
truncated: boolean truncated: boolean
}; };
type llamaRequest = { type llamaCompletionRequest = {
n_predict: number, n_predict: number,
mirostat: number, mirostat: number,
repeat_penalty: number, repeat_penalty: number,
@ -43,21 +43,45 @@ type llamaRequest = {
tfs_z: number, tfs_z: number,
seed: number, seed: number,
stream: boolean, stream: boolean,
cache_prompt: boolean, prompt: string,
prompt?: string,
input_prefix?: string,
input_suffix?: string
}; };
type fetchErrorCause = { type llamaFillRequest = {
errno: number, n_predict: number,
code: string, mirostat: number,
syscall: string, repeat_penalty: number,
address: string, frequency_penalty: number,
port: number presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
input_prefix: string,
input_suffix: string,
}; };
const llama_ctxsize = 2048;
const llama_maxtokens = -1;
const llama_mirostat = 0;
const llama_repeat_penalty = 1.11;
const llama_frequency_penalty = 0.0;
const llama_presence_penalty = 0.0;
const llama_repeat_ctx = 256;
const llama_temperature = 0.25;
const llama_top_p = 0.95;
const llama_top_k = 40;
const llama_typical_p = 0.95;
const llama_tailfree_z = 0.5;
const llama_session_seed = -1;
const llama_host = "http://0.0.0.0:8080";
// clean up the document // clean up the document
function clean_text(txt: string): string { function clean_text(txt: string): string {
// these are already done by JSON.stringify() // these are already done by JSON.stringify()
@ -70,54 +94,22 @@ function clean_text(txt: string): string {
} }
// Show a message notification with a set timeout
async function showMessageWithTimeout(message: string, timeout: number): Promise<void> {
void vscode.window.withProgress(
{
location: vscode.ProgressLocation.Notification,
title: message,
cancellable: false,
}, (progress, token) => {
token.onCancellationRequested(() => {});
// This is magic I don't understand
const p = new Promise<void>((resolve) => {
setTimeout(resolve, timeout);
});
return p;
});
};
// show a message on the status bar until the promise is resolved
async function showPendingStatusBar(message: string, operation: Promise<any>): Promise<void> {
void vscode.window.withProgress(
{
location: vscode.ProgressLocation.Window,
title: message,
}, () => operation ).then((aok) => {}, (err) => {});
// we already resolve the operation elsewhere
}
export function activate(context: vscode.ExtensionContext) { export function activate(context: vscode.ExtensionContext) {
console.log('dumbpilot is now active'); console.log('dumbpilot is now active');
let config = vscode.workspace.getConfiguration("dumbpilot"); const config = vscode.workspace.getConfiguration("dumbpilot");
var completion_enabled: boolean = config.get("completionEnabled") as boolean;
// handle completion changes
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
config = vscode.workspace.getConfiguration("dumbpilot");
}));
// TODO: work with local configurations // TODO: work with local configurations
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
completion_enabled = true;
config.update("completionEnabled", true); config.update("completionEnabled", true);
}); });
context.subscriptions.push(disposable); context.subscriptions.push(disposable);
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
completion_enabled = false;
config.update("completionEnabled", false); config.update("completionEnabled", false);
}); });
@ -127,12 +119,11 @@ export function activate(context: vscode.ExtensionContext) {
const provider: vscode.InlineCompletionItemProvider = { const provider: vscode.InlineCompletionItemProvider = {
async provideInlineCompletionItems(document, position, context, token) { async provideInlineCompletionItems(document, position, context, token) {
// disable if predictive completion is disabled if (completion_enabled === false) {
if (config.get("completionEnabled") as boolean === false) {
return null; return null;
} }
// Since for every completion we will query the server, we want to filter out // Since for every completion we want to query the server, we want to filter out
// automatic completion invokes // automatic completion invokes
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
return null; return null;
@ -153,8 +144,8 @@ export function activate(context: vscode.ExtensionContext) {
// Get the document's text and position to send to the model // Get the document's text and position to send to the model
const doc_text = document.getText(); const doc_text = document.getText();
const doc_off = document.offsetAt(position); const doc_off = document.offsetAt(position);
let doc_before = doc_text.substring(0, doc_off); var doc_before = doc_text.substring(0, doc_off);
let doc_after = doc_text.substring(doc_off); var doc_after = doc_text.substring(doc_off);
// make it cleaner in hope to reduce the number of tokens // make it cleaner in hope to reduce the number of tokens
doc_before = clean_text(doc_before); doc_before = clean_text(doc_before);
@ -163,48 +154,37 @@ export function activate(context: vscode.ExtensionContext) {
// TODO: prune text up to a maximum context length // TODO: prune text up to a maximum context length
// Prefix the filename in a comment // Prefix the filename in a comment
let pfx: string, sfx: string; var pfx: string, sfx: string;
const fname = document.fileName.split('/').at(-1);
const lang = document.languageId; const lang = document.languageId;
const prefixes = commentPrefix; const prefixes = commentPrefix;
pfx = (prefixes as any)[lang][0] as string; pfx = (prefixes as any)[lang][0] as string;
sfx = (prefixes as any)[lang][1] as string; sfx = (prefixes as any)[lang][1] as string;
// FIXME: is there a more efficient way? // FIXME: is there a more efficient way?
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before;
const fim = config.get("fimEnabled") as boolean; // server request object
let req_str: string; const request: llamaCompletionRequest = {
let request: llamaRequest = { n_predict: llama_maxtokens,
n_predict: config.get("llamaMaxtokens") as number, mirostat: llama_mirostat,
mirostat: config.get("llamaMirostat") as number, repeat_penalty: llama_repeat_penalty,
repeat_penalty: config.get("llamaRepeatPenalty") as number, frequency_penalty: llama_frequency_penalty,
frequency_penalty: config.get("llamaFrequencyPenalty,") as number, presence_penalty: llama_presence_penalty,
presence_penalty: config.get("llamaPresencePenalty,") as number, repeat_last_n: llama_repeat_ctx,
repeat_last_n: config.get("llamaRepeatCtx,") as number, temperature: llama_temperature,
temperature: config.get("llamaTemperature") as number, top_p: llama_top_p,
top_p: config.get("llamaTop_p") as number, top_k: llama_top_k,
top_k: config.get("llamaTop_k") as number, typical_p: llama_typical_p,
typical_p: config.get("llamaTypical_p") as number, tfs_z: llama_tailfree_z,
tfs_z: config.get("llamaTailfree_z,") as number, seed: llama_session_seed,
seed: config.get("llamaSeed") as number,
stream: false, stream: false,
cache_prompt: config.get("llamaCachePrompt") as boolean prompt: doc_before,
}; };
if (fim === true) { var data: llamaData;
req_str = '/infill';
request.input_prefix = doc_before;
request.input_suffix = doc_after;
} else {
req_str = '/completion';
request.prompt = doc_before;
}
let data: llamaData;
// try to send the request to the running server // try to send the request to the running server
try { try {
const response_promise = fetch( const response = await fetch(
(config.get("llamaHost") as string).concat(req_str), llama_host.concat('/completion'),
{ {
method: 'POST', method: 'POST',
headers: { headers: {
@ -213,25 +193,14 @@ export function activate(context: vscode.ExtensionContext) {
body: JSON.stringify(request) body: JSON.stringify(request)
} }
); );
showPendingStatusBar("dumbpilot waiting", response_promise);
const response = await response_promise;
if (response.ok === false) { if (response.ok === false) {
throw new Error("llama server request is not ok??"); throw new Error("llama server request is not ok??");
} }
data = await response.json() as llamaData; data = await response.json() as llamaData;
const gen_tokens = data.timings.predicted_n;
const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
showMessageWithTimeout(`predicted ${gen_tokens} tokens in ${gen_time} seconds`, 1500);
} catch (e: any) { } catch (e: any) {
const err = e as TypeError; console.log('dumbpilot: ' + e.message);
const cause: fetchErrorCause = err.cause as fetchErrorCause;
const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port;
// let the user know something went wrong
// TODO: maybe add a retry action or something
showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
return null; return null;
}; };