You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dumbpilot/src/extension.ts

216 lines
5.9 KiB

12 months ago
import { ok } from 'assert';
import * as vscode from 'vscode';
12 months ago
import commentPrefix from './comments.json';
12 months ago
// llama.cpp server response format
type llamaData = {
12 months ago
content: string,
generation_settings: JSON,
model: string,
prompt: string,
stopped_eos: boolean,
stopped_limit: boolean,
stopped_word: boolean,
stopping_word: string,
timings: {
predicted_ms: number,
predicted_n: number,
predicted_per_second: number,
predicted_per_token_ms: number,
prompt_ms: number,
prompt_n: number,
prompt_per_second: number,
prompt_per_token_ms: number
},
tokens_cached: number,
tokens_evaluated: number,
tokens_predicted: number,
truncated: boolean
};
type llamaCompletionRequest = {
n_predict: number,
mirostat: number,
repeat_penalty: number,
frequency_penalty: number,
presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
prompt: string,
};
type llamaFillRequest = {
n_predict: number,
mirostat: number,
repeat_penalty: number,
frequency_penalty: number,
presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
input_prefix: string,
input_suffix: string,
12 months ago
};
const llama_ctxsize = 2048;
const llama_maxtokens = -1;
const llama_mirostat = 0;
const llama_repeat_penalty = 1.11;
const llama_frequency_penalty = 0.0;
const llama_presence_penalty = 0.0;
const llama_repeat_ctx = 256;
const llama_temperature = 0.25;
const llama_top_p = 0.95;
const llama_top_k = 40;
const llama_typical_p = 0.95;
const llama_tailfree_z = 0.5;
const llama_session_seed = -1;
const llama_host = "http://0.0.0.0:8080";
// clean up the document
function clean_text(txt: string): string {
// these are already done by JSON.stringify()
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace((/\t/gm, "\\t"));
// FIXME: I don't know if this penalizes some results since most people indent with spaces
//txt = txt.replace(/\s+/gm, " ");
return txt;
}
export function activate(context: vscode.ExtensionContext) {
12 months ago
console.log('dumbpilot is now active');
const config = vscode.workspace.getConfiguration("dumbpilot");
var completion_enabled: boolean = config.get("completionEnabled") as boolean;
// TODO: work with local configurations
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
completion_enabled = true;
config.update("completionEnabled", true);
});
context.subscriptions.push(disposable);
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
completion_enabled = false;
config.update("completionEnabled", false);
});
12 months ago
// Register a new provider of inline completions, this does not decide how it is invoked
// only what the completion should be
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
const provider: vscode.InlineCompletionItemProvider = {
async provideInlineCompletionItems(document, position, context, token) {
if (completion_enabled === false) {
return null;
}
12 months ago
// Since for every completion we want to query the server, we want to filter out
// automatic completion invokes
12 months ago
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
return null;
}
// FIXME: I don't know if this works
token.onCancellationRequested(() => {
console.log("dumbpilot: operation cancelled, may still be running on the server");
return null;
});
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
const result: vscode.InlineCompletionList = {
items: []
};
// Get the document's text and position to send to the model
const doc_text = document.getText();
const doc_off = document.offsetAt(position);
var doc_before = doc_text.substring(0, doc_off);
var doc_after = doc_text.substring(doc_off);
// make it cleaner in hope to reduce the number of tokens
doc_before = clean_text(doc_before);
doc_after = clean_text(doc_after);
// TODO: prune text up to a maximum context length
12 months ago
// Prefix the filename in a comment
var pfx: string, sfx: string;
12 months ago
const lang = document.languageId;
const prefixes = commentPrefix;
pfx = (prefixes as any)[lang][0] as string;
sfx = (prefixes as any)[lang][1] as string;
// FIXME: is there a more efficient way?
12 months ago
doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before;
12 months ago
// server request object
const request: llamaCompletionRequest = {
12 months ago
n_predict: llama_maxtokens,
mirostat: llama_mirostat,
repeat_penalty: llama_repeat_penalty,
frequency_penalty: llama_frequency_penalty,
presence_penalty: llama_presence_penalty,
repeat_last_n: llama_repeat_ctx,
temperature: llama_temperature,
top_p: llama_top_p,
top_k: llama_top_k,
typical_p: llama_typical_p,
tfs_z: llama_tailfree_z,
seed: llama_session_seed,
stream: false,
12 months ago
prompt: doc_before,
};
var data: llamaData;
12 months ago
// try to send the request to the running server
try {
const response = await fetch(
llama_host.concat('/completion'),
{
method: 'POST',
headers: {
'content-type': 'application/json; charset=UTF-8'
},
body: JSON.stringify(request)
}
);
if (response.ok === false) {
throw new Error("llama server request is not ok??");
}
data = await response.json() as llamaData;
12 months ago
} catch (e: any) {
console.log('dumbpilot: ' + e.message);
return null;
};
result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
return result;
},
};
vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider);
}
// This method is called when your extension is deactivated
export function deactivate() {}