|
|
|
import { ok } from 'assert';
|
|
|
|
import * as vscode from 'vscode';
|
|
|
|
import commentPrefix from './comments.json';
|
|
|
|
|
|
|
|
|
|
|
|
// llama.cpp server response format
|
|
|
|
type llamaData = {
|
|
|
|
content: string,
|
|
|
|
generation_settings: JSON,
|
|
|
|
model: string,
|
|
|
|
prompt: string,
|
|
|
|
stopped_eos: boolean,
|
|
|
|
stopped_limit: boolean,
|
|
|
|
stopped_word: boolean,
|
|
|
|
stopping_word: string,
|
|
|
|
timings: {
|
|
|
|
predicted_ms: number,
|
|
|
|
predicted_n: number,
|
|
|
|
predicted_per_second: number,
|
|
|
|
predicted_per_token_ms: number,
|
|
|
|
prompt_ms: number,
|
|
|
|
prompt_n: number,
|
|
|
|
prompt_per_second: number,
|
|
|
|
prompt_per_token_ms: number
|
|
|
|
},
|
|
|
|
tokens_cached: number,
|
|
|
|
tokens_evaluated: number,
|
|
|
|
tokens_predicted: number,
|
|
|
|
truncated: boolean
|
|
|
|
};
|
|
|
|
|
|
|
|
type llamaCompletionRequest = {
|
|
|
|
n_predict: number,
|
|
|
|
mirostat: number,
|
|
|
|
repeat_penalty: number,
|
|
|
|
frequency_penalty: number,
|
|
|
|
presence_penalty: number,
|
|
|
|
repeat_last_n: number,
|
|
|
|
temperature: number,
|
|
|
|
top_p: number,
|
|
|
|
top_k: number,
|
|
|
|
typical_p: number,
|
|
|
|
tfs_z: number,
|
|
|
|
seed: number,
|
|
|
|
stream: boolean,
|
|
|
|
prompt: string,
|
|
|
|
};
|
|
|
|
|
|
|
|
type llamaFillRequest = {
|
|
|
|
n_predict: number,
|
|
|
|
mirostat: number,
|
|
|
|
repeat_penalty: number,
|
|
|
|
frequency_penalty: number,
|
|
|
|
presence_penalty: number,
|
|
|
|
repeat_last_n: number,
|
|
|
|
temperature: number,
|
|
|
|
top_p: number,
|
|
|
|
top_k: number,
|
|
|
|
typical_p: number,
|
|
|
|
tfs_z: number,
|
|
|
|
seed: number,
|
|
|
|
stream: boolean,
|
|
|
|
input_prefix: string,
|
|
|
|
input_suffix: string
|
|
|
|
};
|
|
|
|
|
|
|
|
type fetchErrorCause = {
|
|
|
|
errno: number,
|
|
|
|
code: string,
|
|
|
|
syscall: string,
|
|
|
|
address: string,
|
|
|
|
port: number
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// clean up the document
|
|
|
|
function clean_text(txt: string): string {
|
|
|
|
// these are already done by JSON.stringify()
|
|
|
|
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
|
|
|
|
//txt = txt.replace((/\t/gm, "\\t"));
|
|
|
|
|
|
|
|
// FIXME: I don't know if this penalizes some results since most people indent with spaces
|
|
|
|
//txt = txt.replace(/\s+/gm, " ");
|
|
|
|
return txt;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Show a message notification with a set timeout
|
|
|
|
async function showMessageWithTimeout(message: string, timeout: number): Promise<void> {
|
|
|
|
void vscode.window.withProgress(
|
|
|
|
{
|
|
|
|
location: vscode.ProgressLocation.Notification,
|
|
|
|
title: message,
|
|
|
|
cancellable: false,
|
|
|
|
}, (progress, token) => {
|
|
|
|
token.onCancellationRequested(() => {});
|
|
|
|
|
|
|
|
// This is magic I don't understand
|
|
|
|
const p = new Promise<void>((resolve) => {
|
|
|
|
setTimeout(resolve, timeout);
|
|
|
|
});
|
|
|
|
return p;
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// show a message on the status bar until the promise is resolved
|
|
|
|
async function showPendingStatusBar(message: string, resolve: Promise<any>): Promise<void> {
|
|
|
|
void vscode.window.withProgress(
|
|
|
|
{
|
|
|
|
location: vscode.ProgressLocation.Window,
|
|
|
|
title: message,
|
|
|
|
}, () => { return resolve; });
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export function activate(context: vscode.ExtensionContext) {
|
|
|
|
|
|
|
|
console.log('dumbpilot is now active');
|
|
|
|
|
|
|
|
const config = vscode.workspace.getConfiguration("dumbpilot");
|
|
|
|
let completion_enabled: boolean = config.get("completionEnabled") as boolean;
|
|
|
|
|
|
|
|
// TODO: work with local configurations
|
|
|
|
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
|
|
|
|
completion_enabled = true;
|
|
|
|
config.update("completionEnabled", true);
|
|
|
|
});
|
|
|
|
context.subscriptions.push(disposable);
|
|
|
|
|
|
|
|
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
|
|
|
|
completion_enabled = false;
|
|
|
|
config.update("completionEnabled", false);
|
|
|
|
});
|
|
|
|
|
|
|
|
// Register a new provider of inline completions, this does not decide how it is invoked
|
|
|
|
// only what the completion should be
|
|
|
|
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
|
|
|
const provider: vscode.InlineCompletionItemProvider = {
|
|
|
|
async provideInlineCompletionItems(document, position, context, token) {
|
|
|
|
|
|
|
|
if (completion_enabled === false) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since for every completion we want to query the server, we want to filter out
|
|
|
|
// automatic completion invokes
|
|
|
|
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: I don't know if this works
|
|
|
|
token.onCancellationRequested(() => {
|
|
|
|
console.log("dumbpilot: operation cancelled, may still be running on the server");
|
|
|
|
return null;
|
|
|
|
});
|
|
|
|
|
|
|
|
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
|
|
|
|
|
|
|
|
const result: vscode.InlineCompletionList = {
|
|
|
|
items: []
|
|
|
|
};
|
|
|
|
|
|
|
|
// Get the document's text and position to send to the model
|
|
|
|
const doc_text = document.getText();
|
|
|
|
const doc_off = document.offsetAt(position);
|
|
|
|
let doc_before = doc_text.substring(0, doc_off);
|
|
|
|
let doc_after = doc_text.substring(doc_off);
|
|
|
|
|
|
|
|
// make it cleaner in hope to reduce the number of tokens
|
|
|
|
doc_before = clean_text(doc_before);
|
|
|
|
doc_after = clean_text(doc_after);
|
|
|
|
|
|
|
|
// TODO: prune text up to a maximum context length
|
|
|
|
|
|
|
|
// Prefix the filename in a comment
|
|
|
|
let pfx: string, sfx: string;
|
|
|
|
const lang = document.languageId;
|
|
|
|
const prefixes = commentPrefix;
|
|
|
|
pfx = (prefixes as any)[lang][0] as string;
|
|
|
|
sfx = (prefixes as any)[lang][1] as string;
|
|
|
|
// FIXME: is there a more efficient way?
|
|
|
|
doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before;
|
|
|
|
|
|
|
|
// server request object
|
|
|
|
const request: llamaCompletionRequest = {
|
|
|
|
n_predict: config.get("llamaMaxtokens") as number,
|
|
|
|
mirostat: config.get("llamaMirostat") as number,
|
|
|
|
repeat_penalty: config.get("llamaRepeatPenalty") as number,
|
|
|
|
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
|
|
|
|
presence_penalty: config.get("llamaPresencePenalty,") as number,
|
|
|
|
repeat_last_n: config.get("llamaRepeatCtx,") as number,
|
|
|
|
temperature: config.get("llamaTemperature") as number,
|
|
|
|
top_p: config.get("llamaTop_p") as number,
|
|
|
|
top_k: config.get("llamaTop_k") as number,
|
|
|
|
typical_p: config.get("llamaTypical_p") as number,
|
|
|
|
tfs_z: config.get("llamaTailfree_z,") as number,
|
|
|
|
seed: config.get("llamaSeed") as number,
|
|
|
|
stream: false,
|
|
|
|
prompt: doc_before,
|
|
|
|
};
|
|
|
|
|
|
|
|
let data: llamaData;
|
|
|
|
// try to send the request to the running server
|
|
|
|
try {
|
|
|
|
const response_promise = fetch(
|
|
|
|
(config.get("llamaHost") as string).concat('/completion'),
|
|
|
|
{
|
|
|
|
method: 'POST',
|
|
|
|
headers: {
|
|
|
|
'content-type': 'application/json; charset=UTF-8'
|
|
|
|
},
|
|
|
|
body: JSON.stringify(request)
|
|
|
|
}
|
|
|
|
);
|
|
|
|
|
|
|
|
showPendingStatusBar("dumbpilot waiting", response_promise);
|
|
|
|
const response = await response_promise;
|
|
|
|
if (response.ok === false) {
|
|
|
|
throw new Error("llama server request is not ok??");
|
|
|
|
}
|
|
|
|
|
|
|
|
data = await response.json() as llamaData;
|
|
|
|
console.log(JSON.stringify(data));
|
|
|
|
|
|
|
|
} catch (e: any) {
|
|
|
|
const err = e as TypeError;
|
|
|
|
const cause: fetchErrorCause = err.cause as fetchErrorCause;
|
|
|
|
const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port;
|
|
|
|
// let the user know something went wrong
|
|
|
|
// TODO: maybe add a retry action or something
|
|
|
|
showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
|
|
|
|
return null;
|
|
|
|
};
|
|
|
|
|
|
|
|
result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
|
|
|
|
return result;
|
|
|
|
},
|
|
|
|
};
|
|
|
|
vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider);
|
|
|
|
}
|
|
|
|
|
|
|
|
// This method is called when your extension is deactivated
|
|
|
|
export function deactivate() {}
|