You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
164 lines
4.6 KiB
164 lines
4.6 KiB
12 months ago
|
import { ok } from 'assert';
|
||
|
import * as vscode from 'vscode';
|
||
|
|
||
|
|
||
|
// llama.cpp server response format
|
||
|
type llama_data = {
|
||
|
content: string,
|
||
|
generation_settings: JSON,
|
||
|
model: string,
|
||
|
prompt: string,
|
||
|
stopped_eos: boolean,
|
||
|
stopped_limit: boolean,
|
||
|
stopped_word: boolean,
|
||
|
stopping_word: string,
|
||
|
timings: {
|
||
|
predicted_ms: number,
|
||
|
predicted_n: number,
|
||
|
predicted_per_second: number,
|
||
|
predicted_per_token_ms: number,
|
||
|
prompt_ms: number,
|
||
|
prompt_n: number,
|
||
|
prompt_per_second: number,
|
||
|
prompt_per_token_ms: number
|
||
|
},
|
||
|
tokens_cached: number,
|
||
|
tokens_evaluated: number,
|
||
|
tokens_predicted: number,
|
||
|
truncated: boolean
|
||
|
|
||
|
};
|
||
|
|
||
|
|
||
|
const llama_ctxsize = 2048;
|
||
|
const llama_maxtokens = -1;
|
||
|
const llama_mirostat = 0;
|
||
|
const llama_repeat_penalty = 1.11;
|
||
|
const llama_frequency_penalty = 0.0;
|
||
|
const llama_presence_penalty = 0.0;
|
||
|
const llama_repeat_ctx = 256;
|
||
|
const llama_temperature = 0.25;
|
||
|
const llama_top_p = 0.95;
|
||
|
const llama_top_k = 40;
|
||
|
const llama_typical_p = 0.95;
|
||
|
const llama_tailfree_z = 0.5;
|
||
|
const llama_session_seed = -1;
|
||
|
|
||
|
const llama_host = "http://0.0.0.0:8080";
|
||
|
|
||
|
|
||
|
export function activate(cotext: vscode.ExtensionContext) {
|
||
|
|
||
|
console.log('dumbpilot is now active');
|
||
|
|
||
|
// Register a new provider of inline completions, this does not decide how it is invoked
|
||
|
// only what the completion should be
|
||
|
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
||
|
const provider: vscode.InlineCompletionItemProvider = {
|
||
|
async provideInlineCompletionItems(document, position, context, token) {
|
||
|
|
||
|
// Since for every completion we want to query the server, we want to filter out
|
||
|
// accidental automatic completion invokes
|
||
|
if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
// FIXME: I don't know if this works
|
||
|
token.onCancellationRequested(() => {
|
||
|
console.log("dumbpilot: operation cancelled, may still be running on the server");
|
||
|
return null;
|
||
|
});
|
||
|
|
||
|
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
|
||
|
|
||
|
const result: vscode.InlineCompletionList = {
|
||
|
items: []
|
||
|
};
|
||
|
|
||
|
// Get the document's text and position to send to the model
|
||
|
const doc_text = document.getText();
|
||
|
const doc_off = document.offsetAt(position);
|
||
|
var doc_before = doc_text.substring(0, doc_off);
|
||
|
var doc_after = doc_text.substring(doc_off);
|
||
|
|
||
|
// clean up the document, stuff commented is already done by JSON.stringify()
|
||
|
//doc_before = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n");
|
||
|
//doc_before = doc_before.replace(/\t/gm, "\\t");
|
||
|
//doc_after = doc_before.replace(/(\r\n|\n|\r)/gm, "\\n");
|
||
|
//doc_after = doc_before.replace(/\t/gm, "\\t");
|
||
|
|
||
|
// FIXME: I don't know if this penalizes some results since most people indent with spaces
|
||
|
//doc_before = doc_before.replace(/\s+/gm, " ");
|
||
|
//doc_after = doc_before.replace(/\s+/gm, " ");
|
||
|
|
||
|
// prefix commented filename, is this the best way?
|
||
|
var comment_prefix: string = '';
|
||
|
switch (document.languageId) {
|
||
|
case 'c':
|
||
|
case 'js':
|
||
|
case 'ts':
|
||
|
comment_prefix = '//';
|
||
|
break;
|
||
|
case 'sh':
|
||
|
case 'bash':
|
||
|
case 'zsh':
|
||
|
case 'py':
|
||
|
case 'python':
|
||
|
comment_prefix = '#';
|
||
|
break;
|
||
|
}
|
||
|
doc_before = comment_prefix + ' ' + document.fileName + '\n' + doc_before;
|
||
|
|
||
|
// server request object
|
||
|
const request = {
|
||
|
n_predict: llama_maxtokens,
|
||
|
mirostat: llama_mirostat,
|
||
|
repeat_penalty: llama_repeat_penalty,
|
||
|
frequency_penalty: llama_frequency_penalty,
|
||
|
presence_penalty: llama_presence_penalty,
|
||
|
repeat_last_n: llama_repeat_ctx,
|
||
|
temperature: llama_temperature,
|
||
|
top_p: llama_top_p,
|
||
|
top_k: llama_top_k,
|
||
|
typical_p: llama_typical_p,
|
||
|
tfs_z: llama_tailfree_z,
|
||
|
seed: llama_session_seed,
|
||
|
// input_prefix: doc_before,
|
||
|
// input_suffix: doc_after,
|
||
|
prompt: doc_before,
|
||
|
};
|
||
|
|
||
|
var data: llama_data;
|
||
|
// try to send the request to the running server
|
||
|
try {
|
||
|
const response = await fetch(
|
||
|
llama_host.concat('/completion'),
|
||
|
{
|
||
|
method: 'POST',
|
||
|
headers: {
|
||
|
'content-type': 'application/json; charset=UTF-8'
|
||
|
},
|
||
|
body: JSON.stringify(request)
|
||
|
}
|
||
|
);
|
||
|
if (response.ok === false) {
|
||
|
throw new Error("llama server request is not ok??");
|
||
|
}
|
||
|
|
||
|
data = await response.json() as llama_data;
|
||
|
|
||
|
} catch (e: any) {
|
||
|
console.log('dumbpilot: ' + e.message);
|
||
|
return null;
|
||
|
};
|
||
|
|
||
|
result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
|
||
|
return result;
|
||
|
},
|
||
|
};
|
||
|
vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider);
|
||
|
}
|
||
|
|
||
|
// This method is called when your extension is deactivated
|
||
|
export function deactivate() {}
|