|
|
|
@ -4,7 +4,7 @@ import commentPrefix from './comments.json'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// llama.cpp server response format
|
|
|
|
|
type llama_data = { |
|
|
|
|
type llamaData = { |
|
|
|
|
content: string, |
|
|
|
|
generation_settings: JSON, |
|
|
|
|
model: string, |
|
|
|
@ -27,7 +27,41 @@ type llama_data = { |
|
|
|
|
tokens_evaluated: number, |
|
|
|
|
tokens_predicted: number, |
|
|
|
|
truncated: boolean |
|
|
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
type llamaCompletionRequest = { |
|
|
|
|
n_predict: number, |
|
|
|
|
mirostat: number, |
|
|
|
|
repeat_penalty: number, |
|
|
|
|
frequency_penalty: number,
|
|
|
|
|
presence_penalty: number,
|
|
|
|
|
repeat_last_n: number,
|
|
|
|
|
temperature: number, |
|
|
|
|
top_p: number, |
|
|
|
|
top_k: number, |
|
|
|
|
typical_p: number, |
|
|
|
|
tfs_z: number,
|
|
|
|
|
seed: number, |
|
|
|
|
stream: boolean, |
|
|
|
|
prompt: string, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
type llamaFillRequest = { |
|
|
|
|
n_predict: number, |
|
|
|
|
mirostat: number, |
|
|
|
|
repeat_penalty: number, |
|
|
|
|
frequency_penalty: number,
|
|
|
|
|
presence_penalty: number,
|
|
|
|
|
repeat_last_n: number,
|
|
|
|
|
temperature: number, |
|
|
|
|
top_p: number, |
|
|
|
|
top_k: number, |
|
|
|
|
typical_p: number, |
|
|
|
|
tfs_z: number,
|
|
|
|
|
seed: number, |
|
|
|
|
stream: boolean, |
|
|
|
|
input_prefix: string, |
|
|
|
|
input_suffix: string, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -48,18 +82,49 @@ const llama_session_seed = -1; |
|
|
|
|
const llama_host = "http://0.0.0.0:8080"; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function activate(cotext: vscode.ExtensionContext) { |
|
|
|
|
// clean up the document
|
|
|
|
|
function clean_text(txt: string): string { |
|
|
|
|
// these are already done by JSON.stringify()
|
|
|
|
|
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
|
|
|
|
|
//txt = txt.replace((/\t/gm, "\\t"));
|
|
|
|
|
|
|
|
|
|
// FIXME: I don't know if this penalizes some results since most people indent with spaces
|
|
|
|
|
//txt = txt.replace(/\s+/gm, " ");
|
|
|
|
|
return txt; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
|
|
|
|
|
console.log('dumbpilot is now active'); |
|
|
|
|
|
|
|
|
|
const config = vscode.workspace.getConfiguration("dumbpilot"); |
|
|
|
|
var completion_enabled: boolean = config.get("completionEnabled") as boolean; |
|
|
|
|
|
|
|
|
|
// TODO: work with local configurations
|
|
|
|
|
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { |
|
|
|
|
completion_enabled = true; |
|
|
|
|
config.update("completionEnabled", true); |
|
|
|
|
}); |
|
|
|
|
context.subscriptions.push(disposable); |
|
|
|
|
|
|
|
|
|
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { |
|
|
|
|
completion_enabled = false; |
|
|
|
|
config.update("completionEnabled", false); |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
// Register a new provider of inline completions, this does not decide how it is invoked
|
|
|
|
|
// only what the completion should be
|
|
|
|
|
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
|
|
|
|
const provider: vscode.InlineCompletionItemProvider = { |
|
|
|
|
async provideInlineCompletionItems(document, position, context, token) { |
|
|
|
|
|
|
|
|
|
if (completion_enabled === false) { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Since for every completion we want to query the server, we want to filter out
|
|
|
|
|
// accidental automatic completion invokes
|
|
|
|
|
// automatic completion invokes
|
|
|
|
|
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
@ -82,27 +147,23 @@ export function activate(cotext: vscode.ExtensionContext) { |
|
|
|
|
var doc_before = doc_text.substring(0, doc_off); |
|
|
|
|
var doc_after = doc_text.substring(doc_off); |
|
|
|
|
|
|
|
|
|
// clean up the document, stuff commented is already done by JSON.stringify()
|
|
|
|
|
//doc_before = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n");
|
|
|
|
|
//doc_before = doc_before.replace(/\t/gm, "\\t");
|
|
|
|
|
//doc_after = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n");
|
|
|
|
|
//doc_after = doc_before.replace(/\t/gm, "\\t");
|
|
|
|
|
|
|
|
|
|
// FIXME: I don't know if this penalizes some results since most people indent with spaces
|
|
|
|
|
//doc_before = doc_before.replace(/\s+/gm, " ");
|
|
|
|
|
//doc_after = doc_before.replace(/\s+/gm, " ");
|
|
|
|
|
// make it cleaner in hope to reduce the number of tokens
|
|
|
|
|
doc_before = clean_text(doc_before); |
|
|
|
|
doc_after = clean_text(doc_after); |
|
|
|
|
|
|
|
|
|
// TODO: prune text up to a maximum context length
|
|
|
|
|
|
|
|
|
|
// prefix commented filename, is this the best way?
|
|
|
|
|
var pfx: string = ''; |
|
|
|
|
var sfx: string = ''; |
|
|
|
|
// Prefix the filename in a comment
|
|
|
|
|
var pfx: string, sfx: string; |
|
|
|
|
const lang = document.languageId; |
|
|
|
|
const prefixes = commentPrefix; |
|
|
|
|
pfx = (prefixes as any)[lang][0] as string; |
|
|
|
|
sfx = (prefixes as any)[lang][1] as string; |
|
|
|
|
// FIXME: is there a more efficient way?
|
|
|
|
|
doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before; |
|
|
|
|
|
|
|
|
|
// server request object
|
|
|
|
|
const request = { |
|
|
|
|
const request: llamaCompletionRequest = { |
|
|
|
|
n_predict: llama_maxtokens, |
|
|
|
|
mirostat: llama_mirostat, |
|
|
|
|
repeat_penalty: llama_repeat_penalty, |
|
|
|
@ -115,12 +176,11 @@ export function activate(cotext: vscode.ExtensionContext) { |
|
|
|
|
typical_p: llama_typical_p, |
|
|
|
|
tfs_z: llama_tailfree_z,
|
|
|
|
|
seed: llama_session_seed, |
|
|
|
|
// input_prefix: doc_before,
|
|
|
|
|
// input_suffix: doc_after,
|
|
|
|
|
stream: false, |
|
|
|
|
prompt: doc_before, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
var data: llama_data; |
|
|
|
|
var data: llamaData; |
|
|
|
|
// try to send the request to the running server
|
|
|
|
|
try { |
|
|
|
|
const response = await fetch( |
|
|
|
@ -137,7 +197,7 @@ export function activate(cotext: vscode.ExtensionContext) { |
|
|
|
|
throw new Error("llama server request is not ok??"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
data = await response.json() as llama_data; |
|
|
|
|
data = await response.json() as llamaData; |
|
|
|
|
|
|
|
|
|
} catch (e: any) { |
|
|
|
|
console.log('dumbpilot: ' + e.message); |
|
|
|
|