diff --git a/.vscode/launch.json b/.vscode/launch.json index 670d6e6..b6e01e8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,7 +23,7 @@ "request": "launch", "args": [ "--extensionDevelopmentPath=${workspaceFolder}", - "--extensionTestsPath=${workspaceFolder}/out/test/suite/index" + "--extensionTestsPath=${workspaceFolder}/out/test/" ], "outFiles": [ "${workspaceFolder}/out/test/**/*.js" diff --git a/TODO.md b/TODO.md index 8025726..74f49dd 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,6 @@ [x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */ [] - test cancel token [] - add fill in middle -[] - add config option to disable the extension +[x] - add config option to disable the extension [] - add command to test and query connection to server -[] - add feedback when waiting response \ No newline at end of file +[x] - add feedback when waiting response \ No newline at end of file diff --git a/package.json b/package.json index 153bb65..d7d2e9c 100644 --- a/package.json +++ b/package.json @@ -122,7 +122,40 @@ "when": "editorTextFocus", "command": "editor.action.inlineSuggest.trigger" } - ] + ], + "configuration": { + "title": "dumbpilot", + "properties": { + "dumbpilot.completionEnabled": { + "type": "boolean", + "default": true, + "description": "Enable predictive code completion" + }, + "dumbpilot.fimEnabled": { + "type": "boolean", + "default": false, + "description": "Enable Fill in Middle mode, defaults to Up-to cursor context" + }, + "dumbpilot.llamaHost": { + "type": "string", + "default": "http://0.0.0.0:8080", + "description": "llama.cpp server address" + }, + "dumbpilot.llamaCtxsize": {"type": "number", "default": 2048}, + "dumbpilot.llamaMaxtokens": {"type": "number", "default": -1}, + "dumbpilot.llamaMirostat": {"type": "number", "default": 0}, + "dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11}, + "dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0}, + "dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0}, + "dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256}, + "dumbpilot.llamaTemperature": {"type": "number", "default": 0.25}, + "dumbpilot.llamaTop_p": {"type": "number", "default": 0.95}, + "dumbpilot.llamaTop_k": {"type": "number", "default": 40}, + "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, + "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, + "dumbpilot.llamaSeed": {"type": "number", "default": -1} + } + } }, "scripts": { "vscode:prepublish": "npm run compile", diff --git a/src/extension.ts b/src/extension.ts index de09aeb..f8f9dd2 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -61,25 +61,16 @@ type llamaFillRequest = { seed: number, stream: boolean, input_prefix: string, - input_suffix: string, + input_suffix: string }; - -const llama_ctxsize = 2048; -const llama_maxtokens = -1; -const llama_mirostat = 0; -const llama_repeat_penalty = 1.11; -const llama_frequency_penalty = 0.0; -const llama_presence_penalty = 0.0; -const llama_repeat_ctx = 256; -const llama_temperature = 0.25; -const llama_top_p = 0.95; -const llama_top_k = 40; -const llama_typical_p = 0.95; -const llama_tailfree_z = 0.5; -const llama_session_seed = -1; - -const llama_host = "http://0.0.0.0:8080"; +type fetchErrorCause = { + errno: number, + code: string, + syscall: string, + address: string, + port: number +}; // clean up the document @@ -94,12 +85,41 @@ function clean_text(txt: string): string { } +// Show a message notification with a set timeout +async function showMessageWithTimeout(message: string, timeout: number): Promise { + void vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: message, + cancellable: false, + }, (progress, token) => { + token.onCancellationRequested(() => {}); + + // This is magic I don't understand + const p = new Promise((resolve) => { + setTimeout(resolve, timeout); + }); + return p; + }); +}; + + +// show a message on the status bar until the promise is resolved +async function showPendingStatusBar(message: string, resolve: Promise): Promise { + void vscode.window.withProgress( + { + location: vscode.ProgressLocation.Window, + title: message, + }, () => { return resolve; }); +} + + export function activate(context: vscode.ExtensionContext) { console.log('dumbpilot is now active'); const config = vscode.workspace.getConfiguration("dumbpilot"); - var completion_enabled: boolean = config.get("completionEnabled") as boolean; + let completion_enabled: boolean = config.get("completionEnabled") as boolean; // TODO: work with local configurations let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { @@ -144,8 +164,8 @@ export function activate(context: vscode.ExtensionContext) { // Get the document's text and position to send to the model const doc_text = document.getText(); const doc_off = document.offsetAt(position); - var doc_before = doc_text.substring(0, doc_off); - var doc_after = doc_text.substring(doc_off); + let doc_before = doc_text.substring(0, doc_off); + let doc_after = doc_text.substring(doc_off); // make it cleaner in hope to reduce the number of tokens doc_before = clean_text(doc_before); @@ -154,7 +174,7 @@ export function activate(context: vscode.ExtensionContext) { // TODO: prune text up to a maximum context length // Prefix the filename in a comment - var pfx: string, sfx: string; + let pfx: string, sfx: string; const lang = document.languageId; const prefixes = commentPrefix; pfx = (prefixes as any)[lang][0] as string; @@ -164,27 +184,27 @@ export function activate(context: vscode.ExtensionContext) { // server request object const request: llamaCompletionRequest = { - n_predict: llama_maxtokens, - mirostat: llama_mirostat, - repeat_penalty: llama_repeat_penalty, - frequency_penalty: llama_frequency_penalty, - presence_penalty: llama_presence_penalty, - repeat_last_n: llama_repeat_ctx, - temperature: llama_temperature, - top_p: llama_top_p, - top_k: llama_top_k, - typical_p: llama_typical_p, - tfs_z: llama_tailfree_z, - seed: llama_session_seed, + n_predict: config.get("llamaMaxtokens") as number, + mirostat: config.get("llamaMirostat") as number, + repeat_penalty: config.get("llamaRepeatPenalty") as number, + frequency_penalty: config.get("llamaFrequencyPenalty,") as number, + presence_penalty: config.get("llamaPresencePenalty,") as number, + repeat_last_n: config.get("llamaRepeatCtx,") as number, + temperature: config.get("llamaTemperature") as number, + top_p: config.get("llamaTop_p") as number, + top_k: config.get("llamaTop_k") as number, + typical_p: config.get("llamaTypical_p") as number, + tfs_z: config.get("llamaTailfree_z,") as number, + seed: config.get("llamaSeed") as number, stream: false, prompt: doc_before, }; - var data: llamaData; + let data: llamaData; // try to send the request to the running server try { - const response = await fetch( - llama_host.concat('/completion'), + const response_promise = fetch( + (config.get("llamaHost") as string).concat('/completion'), { method: 'POST', headers: { @@ -193,14 +213,23 @@ export function activate(context: vscode.ExtensionContext) { body: JSON.stringify(request) } ); + + showPendingStatusBar("dumbpilot waiting", response_promise); + const response = await response_promise; if (response.ok === false) { throw new Error("llama server request is not ok??"); } data = await response.json() as llamaData; + console.log(JSON.stringify(data)); } catch (e: any) { - console.log('dumbpilot: ' + e.message); + const err = e as TypeError; + const cause: fetchErrorCause = err.cause as fetchErrorCause; + const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port; + // let the user know something went wrong + // TODO: maybe add a retry action or something + showMessageWithTimeout('dumbpilot error: ' + estr, 3000); return null; };