import { ok } from 'assert'; import * as vscode from 'vscode'; import commentPrefix from './comments.json'; // llama.cpp server response format type llamaData = { content: string, generation_settings: JSON, model: string, prompt: string, stopped_eos: boolean, stopped_limit: boolean, stopped_word: boolean, stopping_word: string, timings: { predicted_ms: number, predicted_n: number, predicted_per_second: number, predicted_per_token_ms: number, prompt_ms: number, prompt_n: number, prompt_per_second: number, prompt_per_token_ms: number }, tokens_cached: number, tokens_evaluated: number, tokens_predicted: number, truncated: boolean }; type llamaCompletionRequest = { n_predict: number, mirostat: number, repeat_penalty: number, frequency_penalty: number, presence_penalty: number, repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, tfs_z: number, seed: number, stream: boolean, prompt: string, }; type llamaFillRequest = { n_predict: number, mirostat: number, repeat_penalty: number, frequency_penalty: number, presence_penalty: number, repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, tfs_z: number, seed: number, stream: boolean, input_prefix: string, input_suffix: string, }; const llama_ctxsize = 2048; const llama_maxtokens = -1; const llama_mirostat = 0; const llama_repeat_penalty = 1.11; const llama_frequency_penalty = 0.0; const llama_presence_penalty = 0.0; const llama_repeat_ctx = 256; const llama_temperature = 0.25; const llama_top_p = 0.95; const llama_top_k = 40; const llama_typical_p = 0.95; const llama_tailfree_z = 0.5; const llama_session_seed = -1; const llama_host = "http://0.0.0.0:8080"; // clean up the document function clean_text(txt: string): string { // these are already done by JSON.stringify() //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); //txt = txt.replace((/\t/gm, "\\t")); // FIXME: I don't know if this penalizes some results since most people indent with spaces //txt = txt.replace(/\s+/gm, " "); return txt; } export function activate(context: vscode.ExtensionContext) { console.log('dumbpilot is now active'); const config = vscode.workspace.getConfiguration("dumbpilot"); var completion_enabled: boolean = config.get("completionEnabled") as boolean; // TODO: work with local configurations let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { completion_enabled = true; config.update("completionEnabled", true); }); context.subscriptions.push(disposable); disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { completion_enabled = false; config.update("completionEnabled", false); }); // Register a new provider of inline completions, this does not decide how it is invoked // only what the completion should be // https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts const provider: vscode.InlineCompletionItemProvider = { async provideInlineCompletionItems(document, position, context, token) { if (completion_enabled === false) { return null; } // Since for every completion we want to query the server, we want to filter out // automatic completion invokes if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { return null; } // FIXME: I don't know if this works token.onCancellationRequested(() => { console.log("dumbpilot: operation cancelled, may still be running on the server"); return null; }); //console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character); const result: vscode.InlineCompletionList = { items: [] }; // Get the document's text and position to send to the model const doc_text = document.getText(); const doc_off = document.offsetAt(position); var doc_before = doc_text.substring(0, doc_off); var doc_after = doc_text.substring(doc_off); // make it cleaner in hope to reduce the number of tokens doc_before = clean_text(doc_before); doc_after = clean_text(doc_after); // TODO: prune text up to a maximum context length // Prefix the filename in a comment var pfx: string, sfx: string; const lang = document.languageId; const prefixes = commentPrefix; pfx = (prefixes as any)[lang][0] as string; sfx = (prefixes as any)[lang][1] as string; // FIXME: is there a more efficient way? doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before; // server request object const request: llamaCompletionRequest = { n_predict: llama_maxtokens, mirostat: llama_mirostat, repeat_penalty: llama_repeat_penalty, frequency_penalty: llama_frequency_penalty, presence_penalty: llama_presence_penalty, repeat_last_n: llama_repeat_ctx, temperature: llama_temperature, top_p: llama_top_p, top_k: llama_top_k, typical_p: llama_typical_p, tfs_z: llama_tailfree_z, seed: llama_session_seed, stream: false, prompt: doc_before, }; var data: llamaData; // try to send the request to the running server try { const response = await fetch( llama_host.concat('/completion'), { method: 'POST', headers: { 'content-type': 'application/json; charset=UTF-8' }, body: JSON.stringify(request) } ); if (response.ok === false) { throw new Error("llama server request is not ok??"); } data = await response.json() as llamaData; } catch (e: any) { console.log('dumbpilot: ' + e.message); return null; }; result.items.push({insertText: data.content, range: new vscode.Range(position, position)}); return result; }, }; vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider); } // This method is called when your extension is deactivated export function deactivate() {}