import { ok } from 'assert'; import * as vscode from 'vscode'; import commentPrefix from './comments.json'; // llama.cpp server response format type llamaData = { content: string, generation_settings: JSON, model: string, prompt: string, stopped_eos: boolean, stopped_limit: boolean, stopped_word: boolean, stopping_word: string, timings: { predicted_ms: number, predicted_n: number, predicted_per_second: number, predicted_per_token_ms: number, prompt_ms: number, prompt_n: number, prompt_per_second: number, prompt_per_token_ms: number }, tokens_cached: number, tokens_evaluated: number, tokens_predicted: number, truncated: boolean }; type llamaCompletionRequest = { n_predict: number, mirostat: number, repeat_penalty: number, frequency_penalty: number, presence_penalty: number, repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, tfs_z: number, seed: number, stream: boolean, prompt: string, }; type llamaFillRequest = { n_predict: number, mirostat: number, repeat_penalty: number, frequency_penalty: number, presence_penalty: number, repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, tfs_z: number, seed: number, stream: boolean, input_prefix: string, input_suffix: string }; type fetchErrorCause = { errno: number, code: string, syscall: string, address: string, port: number }; // clean up the document function clean_text(txt: string): string { // these are already done by JSON.stringify() //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); //txt = txt.replace((/\t/gm, "\\t")); // FIXME: I don't know if this penalizes some results since most people indent with spaces //txt = txt.replace(/\s+/gm, " "); return txt; } // Show a message notification with a set timeout async function showMessageWithTimeout(message: string, timeout: number): Promise { void vscode.window.withProgress( { location: vscode.ProgressLocation.Notification, title: message, cancellable: false, }, (progress, token) => { token.onCancellationRequested(() => {}); // This is magic I don't understand const p = new Promise((resolve) => { setTimeout(resolve, timeout); }); return p; }); }; // show a message on the status bar until the promise is resolved async function showPendingStatusBar(message: string, resolve: Promise): Promise { void vscode.window.withProgress( { location: vscode.ProgressLocation.Window, title: message, }, () => { return resolve; }); } export function activate(context: vscode.ExtensionContext) { console.log('dumbpilot is now active'); const config = vscode.workspace.getConfiguration("dumbpilot"); let completion_enabled: boolean = config.get("completionEnabled") as boolean; // TODO: work with local configurations let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { completion_enabled = true; config.update("completionEnabled", true); }); context.subscriptions.push(disposable); disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { completion_enabled = false; config.update("completionEnabled", false); }); // Register a new provider of inline completions, this does not decide how it is invoked // only what the completion should be // https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts const provider: vscode.InlineCompletionItemProvider = { async provideInlineCompletionItems(document, position, context, token) { if (completion_enabled === false) { return null; } // Since for every completion we want to query the server, we want to filter out // automatic completion invokes if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { return null; } // FIXME: I don't know if this works token.onCancellationRequested(() => { console.log("dumbpilot: operation cancelled, may still be running on the server"); return null; }); //console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character); const result: vscode.InlineCompletionList = { items: [] }; // Get the document's text and position to send to the model const doc_text = document.getText(); const doc_off = document.offsetAt(position); let doc_before = doc_text.substring(0, doc_off); let doc_after = doc_text.substring(doc_off); // make it cleaner in hope to reduce the number of tokens doc_before = clean_text(doc_before); doc_after = clean_text(doc_after); // TODO: prune text up to a maximum context length // Prefix the filename in a comment let pfx: string, sfx: string; const lang = document.languageId; const prefixes = commentPrefix; pfx = (prefixes as any)[lang][0] as string; sfx = (prefixes as any)[lang][1] as string; // FIXME: is there a more efficient way? doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before; // server request object const request: llamaCompletionRequest = { n_predict: config.get("llamaMaxtokens") as number, mirostat: config.get("llamaMirostat") as number, repeat_penalty: config.get("llamaRepeatPenalty") as number, frequency_penalty: config.get("llamaFrequencyPenalty,") as number, presence_penalty: config.get("llamaPresencePenalty,") as number, repeat_last_n: config.get("llamaRepeatCtx,") as number, temperature: config.get("llamaTemperature") as number, top_p: config.get("llamaTop_p") as number, top_k: config.get("llamaTop_k") as number, typical_p: config.get("llamaTypical_p") as number, tfs_z: config.get("llamaTailfree_z,") as number, seed: config.get("llamaSeed") as number, stream: false, prompt: doc_before, }; let data: llamaData; // try to send the request to the running server try { const response_promise = fetch( (config.get("llamaHost") as string).concat('/completion'), { method: 'POST', headers: { 'content-type': 'application/json; charset=UTF-8' }, body: JSON.stringify(request) } ); showPendingStatusBar("dumbpilot waiting", response_promise); const response = await response_promise; if (response.ok === false) { throw new Error("llama server request is not ok??"); } data = await response.json() as llamaData; console.log(JSON.stringify(data)); } catch (e: any) { const err = e as TypeError; const cause: fetchErrorCause = err.cause as fetchErrorCause; const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port; // let the user know something went wrong // TODO: maybe add a retry action or something showMessageWithTimeout('dumbpilot error: ' + estr, 3000); return null; }; result.items.push({insertText: data.content, range: new vscode.Range(position, position)}); return result; }, }; vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider); } // This method is called when your extension is deactivated export function deactivate() {}