diff --git a/package.json b/package.json index 9a55581..153bb65 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,7 @@ "name": "dumbpilot", "displayName": "dumbpilot", "description": "Simple code prediction using llama.cpp server api", + "publisher": "Alessandro Mauri", "version": "0.0.1", "engines": { "vscode": "^1.84.0" @@ -102,10 +103,23 @@ ], "main": "./out/extension.js", "contributes": { - "commands": [], + "commands": [ + { + "command": "dumbpilot.disableCompletion", + "title": "Disable predictive code completion", + "category": "dumbpilot" + }, + { + "command": "dumbpilot.enableCompletion", + "title": "Enable predictive code completion", + "category": "dumbpilot" + } + ], "keybindings": [ { "key": "ctrl+shift+l", + "mac": "cmd+shift+l", + "when": "editorTextFocus", "command": "editor.action.inlineSuggest.trigger" } ] diff --git a/src/extension.ts b/src/extension.ts index 65f9739..de09aeb 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -4,7 +4,7 @@ import commentPrefix from './comments.json'; // llama.cpp server response format -type llama_data = { +type llamaData = { content: string, generation_settings: JSON, model: string, @@ -27,7 +27,41 @@ type llama_data = { tokens_evaluated: number, tokens_predicted: number, truncated: boolean - +}; + +type llamaCompletionRequest = { + n_predict: number, + mirostat: number, + repeat_penalty: number, + frequency_penalty: number, + presence_penalty: number, + repeat_last_n: number, + temperature: number, + top_p: number, + top_k: number, + typical_p: number, + tfs_z: number, + seed: number, + stream: boolean, + prompt: string, +}; + +type llamaFillRequest = { + n_predict: number, + mirostat: number, + repeat_penalty: number, + frequency_penalty: number, + presence_penalty: number, + repeat_last_n: number, + temperature: number, + top_p: number, + top_k: number, + typical_p: number, + tfs_z: number, + seed: number, + stream: boolean, + input_prefix: string, + input_suffix: string, }; @@ -48,18 +82,49 @@ const llama_session_seed = -1; const llama_host = "http://0.0.0.0:8080"; -export function activate(cotext: vscode.ExtensionContext) { +// clean up the document +function clean_text(txt: string): string { + // these are already done by JSON.stringify() + //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); + //txt = txt.replace((/\t/gm, "\\t")); + + // FIXME: I don't know if this penalizes some results since most people indent with spaces + //txt = txt.replace(/\s+/gm, " "); + return txt; +} + + +export function activate(context: vscode.ExtensionContext) { console.log('dumbpilot is now active'); + const config = vscode.workspace.getConfiguration("dumbpilot"); + var completion_enabled: boolean = config.get("completionEnabled") as boolean; + + // TODO: work with local configurations + let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { + completion_enabled = true; + config.update("completionEnabled", true); + }); + context.subscriptions.push(disposable); + + disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { + completion_enabled = false; + config.update("completionEnabled", false); + }); + // Register a new provider of inline completions, this does not decide how it is invoked // only what the completion should be // https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts const provider: vscode.InlineCompletionItemProvider = { async provideInlineCompletionItems(document, position, context, token) { + if (completion_enabled === false) { + return null; + } + // Since for every completion we want to query the server, we want to filter out - // accidental automatic completion invokes + // automatic completion invokes if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { return null; } @@ -82,27 +147,23 @@ export function activate(cotext: vscode.ExtensionContext) { var doc_before = doc_text.substring(0, doc_off); var doc_after = doc_text.substring(doc_off); - // clean up the document, stuff commented is already done by JSON.stringify() - //doc_before = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n"); - //doc_before = doc_before.replace(/\t/gm, "\\t"); - //doc_after = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n"); - //doc_after = doc_before.replace(/\t/gm, "\\t"); - - // FIXME: I don't know if this penalizes some results since most people indent with spaces - //doc_before = doc_before.replace(/\s+/gm, " "); - //doc_after = doc_before.replace(/\s+/gm, " "); + // make it cleaner in hope to reduce the number of tokens + doc_before = clean_text(doc_before); + doc_after = clean_text(doc_after); + + // TODO: prune text up to a maximum context length - // prefix commented filename, is this the best way? - var pfx: string = ''; - var sfx: string = ''; + // Prefix the filename in a comment + var pfx: string, sfx: string; const lang = document.languageId; const prefixes = commentPrefix; pfx = (prefixes as any)[lang][0] as string; sfx = (prefixes as any)[lang][1] as string; + // FIXME: is there a more efficient way? doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before; // server request object - const request = { + const request: llamaCompletionRequest = { n_predict: llama_maxtokens, mirostat: llama_mirostat, repeat_penalty: llama_repeat_penalty, @@ -115,12 +176,11 @@ export function activate(cotext: vscode.ExtensionContext) { typical_p: llama_typical_p, tfs_z: llama_tailfree_z, seed: llama_session_seed, -// input_prefix: doc_before, -// input_suffix: doc_after, + stream: false, prompt: doc_before, }; - var data: llama_data; + var data: llamaData; // try to send the request to the running server try { const response = await fetch( @@ -137,7 +197,7 @@ export function activate(cotext: vscode.ExtensionContext) { throw new Error("llama server request is not ok??"); } - data = await response.json() as llama_data; + data = await response.json() as llamaData; } catch (e: any) { console.log('dumbpilot: ' + e.message);