|
|
|
@ -29,7 +29,7 @@ type llamaData = { |
|
|
|
|
truncated: boolean |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
type llamaCompletionRequest = { |
|
|
|
|
type llamaRequest = { |
|
|
|
|
n_predict: number, |
|
|
|
|
mirostat: number, |
|
|
|
|
repeat_penalty: number, |
|
|
|
@ -43,45 +43,21 @@ type llamaCompletionRequest = { |
|
|
|
|
tfs_z: number,
|
|
|
|
|
seed: number, |
|
|
|
|
stream: boolean, |
|
|
|
|
prompt: string, |
|
|
|
|
cache_prompt: boolean, |
|
|
|
|
prompt?: string, |
|
|
|
|
input_prefix?: string, |
|
|
|
|
input_suffix?: string |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
type llamaFillRequest = { |
|
|
|
|
n_predict: number, |
|
|
|
|
mirostat: number, |
|
|
|
|
repeat_penalty: number, |
|
|
|
|
frequency_penalty: number,
|
|
|
|
|
presence_penalty: number,
|
|
|
|
|
repeat_last_n: number,
|
|
|
|
|
temperature: number, |
|
|
|
|
top_p: number, |
|
|
|
|
top_k: number, |
|
|
|
|
typical_p: number, |
|
|
|
|
tfs_z: number,
|
|
|
|
|
seed: number, |
|
|
|
|
stream: boolean, |
|
|
|
|
input_prefix: string, |
|
|
|
|
input_suffix: string, |
|
|
|
|
type fetchErrorCause = { |
|
|
|
|
errno: number, |
|
|
|
|
code: string, |
|
|
|
|
syscall: string, |
|
|
|
|
address: string, |
|
|
|
|
port: number |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const llama_ctxsize = 2048; |
|
|
|
|
const llama_maxtokens = -1; |
|
|
|
|
const llama_mirostat = 0; |
|
|
|
|
const llama_repeat_penalty = 1.11; |
|
|
|
|
const llama_frequency_penalty = 0.0; |
|
|
|
|
const llama_presence_penalty = 0.0; |
|
|
|
|
const llama_repeat_ctx = 256; |
|
|
|
|
const llama_temperature = 0.25; |
|
|
|
|
const llama_top_p = 0.95; |
|
|
|
|
const llama_top_k = 40; |
|
|
|
|
const llama_typical_p = 0.95; |
|
|
|
|
const llama_tailfree_z = 0.5; |
|
|
|
|
const llama_session_seed = -1; |
|
|
|
|
|
|
|
|
|
const llama_host = "http://0.0.0.0:8080"; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// clean up the document
|
|
|
|
|
function clean_text(txt: string): string { |
|
|
|
|
// these are already done by JSON.stringify()
|
|
|
|
@ -94,22 +70,54 @@ function clean_text(txt: string): string { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Show a message notification with a set timeout
|
|
|
|
|
async function showMessageWithTimeout(message: string, timeout: number): Promise<void> { |
|
|
|
|
void vscode.window.withProgress( |
|
|
|
|
{ |
|
|
|
|
location: vscode.ProgressLocation.Notification, |
|
|
|
|
title: message, |
|
|
|
|
cancellable: false, |
|
|
|
|
}, (progress, token) => { |
|
|
|
|
token.onCancellationRequested(() => {}); |
|
|
|
|
|
|
|
|
|
// This is magic I don't understand
|
|
|
|
|
const p = new Promise<void>((resolve) => { |
|
|
|
|
setTimeout(resolve, timeout); |
|
|
|
|
}); |
|
|
|
|
return p; |
|
|
|
|
}); |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// show a message on the status bar until the promise is resolved
|
|
|
|
|
async function showPendingStatusBar(message: string, operation: Promise<any>): Promise<void> { |
|
|
|
|
void vscode.window.withProgress( |
|
|
|
|
{ |
|
|
|
|
location: vscode.ProgressLocation.Window, |
|
|
|
|
title: message, |
|
|
|
|
}, () => operation ).then((aok) => {}, (err) => {}); |
|
|
|
|
// we already resolve the operation elsewhere
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
|
|
|
|
|
console.log('dumbpilot is now active'); |
|
|
|
|
|
|
|
|
|
const config = vscode.workspace.getConfiguration("dumbpilot"); |
|
|
|
|
var completion_enabled: boolean = config.get("completionEnabled") as boolean; |
|
|
|
|
let config = vscode.workspace.getConfiguration("dumbpilot"); |
|
|
|
|
|
|
|
|
|
// handle completion changes
|
|
|
|
|
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => { |
|
|
|
|
config = vscode.workspace.getConfiguration("dumbpilot"); |
|
|
|
|
})); |
|
|
|
|
|
|
|
|
|
// TODO: work with local configurations
|
|
|
|
|
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => { |
|
|
|
|
completion_enabled = true; |
|
|
|
|
config.update("completionEnabled", true); |
|
|
|
|
}); |
|
|
|
|
context.subscriptions.push(disposable); |
|
|
|
|
|
|
|
|
|
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => { |
|
|
|
|
completion_enabled = false; |
|
|
|
|
config.update("completionEnabled", false); |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
@ -119,11 +127,12 @@ export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
const provider: vscode.InlineCompletionItemProvider = { |
|
|
|
|
async provideInlineCompletionItems(document, position, context, token) { |
|
|
|
|
|
|
|
|
|
if (completion_enabled === false) { |
|
|
|
|
// disable if predictive completion is disabled
|
|
|
|
|
if (config.get("completionEnabled") as boolean === false) { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Since for every completion we want to query the server, we want to filter out
|
|
|
|
|
// Since for every completion we will query the server, we want to filter out
|
|
|
|
|
// automatic completion invokes
|
|
|
|
|
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { |
|
|
|
|
return null; |
|
|
|
@ -144,8 +153,8 @@ export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
// Get the document's text and position to send to the model
|
|
|
|
|
const doc_text = document.getText(); |
|
|
|
|
const doc_off = document.offsetAt(position); |
|
|
|
|
var doc_before = doc_text.substring(0, doc_off); |
|
|
|
|
var doc_after = doc_text.substring(doc_off); |
|
|
|
|
let doc_before = doc_text.substring(0, doc_off); |
|
|
|
|
let doc_after = doc_text.substring(doc_off); |
|
|
|
|
|
|
|
|
|
// make it cleaner in hope to reduce the number of tokens
|
|
|
|
|
doc_before = clean_text(doc_before); |
|
|
|
@ -154,37 +163,48 @@ export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
// TODO: prune text up to a maximum context length
|
|
|
|
|
|
|
|
|
|
// Prefix the filename in a comment
|
|
|
|
|
var pfx: string, sfx: string; |
|
|
|
|
let pfx: string, sfx: string; |
|
|
|
|
const fname = document.fileName.split('/').at(-1); |
|
|
|
|
const lang = document.languageId; |
|
|
|
|
const prefixes = commentPrefix; |
|
|
|
|
pfx = (prefixes as any)[lang][0] as string; |
|
|
|
|
sfx = (prefixes as any)[lang][1] as string; |
|
|
|
|
// FIXME: is there a more efficient way?
|
|
|
|
|
doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before; |
|
|
|
|
|
|
|
|
|
// server request object
|
|
|
|
|
const request: llamaCompletionRequest = { |
|
|
|
|
n_predict: llama_maxtokens, |
|
|
|
|
mirostat: llama_mirostat, |
|
|
|
|
repeat_penalty: llama_repeat_penalty, |
|
|
|
|
frequency_penalty: llama_frequency_penalty,
|
|
|
|
|
presence_penalty: llama_presence_penalty,
|
|
|
|
|
repeat_last_n: llama_repeat_ctx,
|
|
|
|
|
temperature: llama_temperature, |
|
|
|
|
top_p: llama_top_p, |
|
|
|
|
top_k: llama_top_k, |
|
|
|
|
typical_p: llama_typical_p, |
|
|
|
|
tfs_z: llama_tailfree_z,
|
|
|
|
|
seed: llama_session_seed, |
|
|
|
|
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; |
|
|
|
|
|
|
|
|
|
const fim = config.get("fimEnabled") as boolean; |
|
|
|
|
let req_str: string; |
|
|
|
|
let request: llamaRequest = { |
|
|
|
|
n_predict: config.get("llamaMaxtokens") as number, |
|
|
|
|
mirostat: config.get("llamaMirostat") as number, |
|
|
|
|
repeat_penalty: config.get("llamaRepeatPenalty") as number, |
|
|
|
|
frequency_penalty: config.get("llamaFrequencyPenalty,") as number, |
|
|
|
|
presence_penalty: config.get("llamaPresencePenalty,") as number, |
|
|
|
|
repeat_last_n: config.get("llamaRepeatCtx,") as number, |
|
|
|
|
temperature: config.get("llamaTemperature") as number, |
|
|
|
|
top_p: config.get("llamaTop_p") as number, |
|
|
|
|
top_k: config.get("llamaTop_k") as number, |
|
|
|
|
typical_p: config.get("llamaTypical_p") as number, |
|
|
|
|
tfs_z: config.get("llamaTailfree_z,") as number, |
|
|
|
|
seed: config.get("llamaSeed") as number, |
|
|
|
|
stream: false, |
|
|
|
|
prompt: doc_before, |
|
|
|
|
cache_prompt: config.get("llamaCachePrompt") as boolean |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
var data: llamaData; |
|
|
|
|
|
|
|
|
|
if (fim === true) { |
|
|
|
|
req_str = '/infill'; |
|
|
|
|
request.input_prefix = doc_before; |
|
|
|
|
request.input_suffix = doc_after; |
|
|
|
|
} else { |
|
|
|
|
req_str = '/completion'; |
|
|
|
|
request.prompt = doc_before; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
let data: llamaData; |
|
|
|
|
// try to send the request to the running server
|
|
|
|
|
try { |
|
|
|
|
const response = await fetch( |
|
|
|
|
llama_host.concat('/completion'), |
|
|
|
|
const response_promise = fetch( |
|
|
|
|
(config.get("llamaHost") as string).concat(req_str), |
|
|
|
|
{ |
|
|
|
|
method: 'POST', |
|
|
|
|
headers: { |
|
|
|
@ -193,14 +213,25 @@ export function activate(context: vscode.ExtensionContext) { |
|
|
|
|
body: JSON.stringify(request) |
|
|
|
|
} |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
showPendingStatusBar("dumbpilot waiting", response_promise); |
|
|
|
|
const response = await response_promise; |
|
|
|
|
if (response.ok === false) { |
|
|
|
|
throw new Error("llama server request is not ok??"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
data = await response.json() as llamaData; |
|
|
|
|
const gen_tokens = data.timings.predicted_n; |
|
|
|
|
const gen_time = (data.timings.predicted_ms / 1000).toFixed(2); |
|
|
|
|
showMessageWithTimeout(`predicted ${gen_tokens} tokens in ${gen_time} seconds`, 1500); |
|
|
|
|
|
|
|
|
|
} catch (e: any) { |
|
|
|
|
console.log('dumbpilot: ' + e.message); |
|
|
|
|
const err = e as TypeError; |
|
|
|
|
const cause: fetchErrorCause = err.cause as fetchErrorCause; |
|
|
|
|
const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port; |
|
|
|
|
// let the user know something went wrong
|
|
|
|
|
// TODO: maybe add a retry action or something
|
|
|
|
|
showMessageWithTimeout('dumbpilot error: ' + estr, 3000); |
|
|
|
|
return null; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|