Alessandro Mauri 12 months ago
parent 6bb3add1be
commit a599d44c10
  1. 7
      package.json
  2. 54
      src/extension.ts

@ -153,7 +153,12 @@
"dumbpilot.llamaTop_k": {"type": "number", "default": 40}, "dumbpilot.llamaTop_k": {"type": "number", "default": 40},
"dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95},
"dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5},
"dumbpilot.llamaSeed": {"type": "number", "default": -1} "dumbpilot.llamaSeed": {"type": "number", "default": -1},
"dumbpilot.llamaCachePrompt": {
"type": "bool",
"default": true,
"description": "Enable prompt caching for faster results"
}
} }
} }
}, },

@ -29,7 +29,7 @@ type llamaData = {
truncated: boolean truncated: boolean
}; };
type llamaCompletionRequest = { type llamaRequest = {
n_predict: number, n_predict: number,
mirostat: number, mirostat: number,
repeat_penalty: number, repeat_penalty: number,
@ -43,25 +43,10 @@ type llamaCompletionRequest = {
tfs_z: number, tfs_z: number,
seed: number, seed: number,
stream: boolean, stream: boolean,
prompt: string, cache_prompt: boolean,
}; prompt?: string,
input_prefix?: string,
type llamaFillRequest = { input_suffix?: string
n_predict: number,
mirostat: number,
repeat_penalty: number,
frequency_penalty: number,
presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
input_prefix: string,
input_suffix: string
}; };
type fetchErrorCause = { type fetchErrorCause = {
@ -118,7 +103,13 @@ export function activate(context: vscode.ExtensionContext) {
console.log('dumbpilot is now active'); console.log('dumbpilot is now active');
const config = vscode.workspace.getConfiguration("dumbpilot"); let config = vscode.workspace.getConfiguration("dumbpilot");
// handle completion changes
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
config = vscode.workspace.getConfiguration("dumbpilot");
}));
let completion_enabled: boolean = config.get("completionEnabled") as boolean; let completion_enabled: boolean = config.get("completionEnabled") as boolean;
// TODO: work with local configurations // TODO: work with local configurations
@ -183,8 +174,9 @@ export function activate(context: vscode.ExtensionContext) {
// FIXME: is there a more efficient way? // FIXME: is there a more efficient way?
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
// server request object const fim = config.get("fimEnabled") as boolean;
const request: llamaCompletionRequest = { let req_str: string;
let request: llamaRequest = {
n_predict: config.get("llamaMaxtokens") as number, n_predict: config.get("llamaMaxtokens") as number,
mirostat: config.get("llamaMirostat") as number, mirostat: config.get("llamaMirostat") as number,
repeat_penalty: config.get("llamaRepeatPenalty") as number, repeat_penalty: config.get("llamaRepeatPenalty") as number,
@ -198,14 +190,24 @@ export function activate(context: vscode.ExtensionContext) {
tfs_z: config.get("llamaTailfree_z,") as number, tfs_z: config.get("llamaTailfree_z,") as number,
seed: config.get("llamaSeed") as number, seed: config.get("llamaSeed") as number,
stream: false, stream: false,
prompt: doc_before, cache_prompt: config.get("llamaCachePrompt") as boolean
}; };
if (fim === true) {
req_str = '/infill';
request.input_prefix = doc_before;
request.input_suffix = doc_after;
} else {
req_str = '/completion';
request.prompt = doc_before;
}
console.log(fim);
let data: llamaData; let data: llamaData;
// try to send the request to the running server // try to send the request to the running server
try { try {
const response_promise = fetch( const response_promise = fetch(
(config.get("llamaHost") as string).concat('/completion'), (config.get("llamaHost") as string).concat(req_str),
{ {
method: 'POST', method: 'POST',
headers: { headers: {

Loading…
Cancel
Save