partial move to file

master
Alessandro Mauri 11 months ago
parent efb85c2cb4
commit 0a493294cf
  1. 88
      src/extension.ts
  2. 103
      src/llamacpp-api.ts
  3. 0
      src/openai-api.ts

@ -1,54 +1,9 @@
import { ok } from 'assert';
import * as vscode from 'vscode';
import commentPrefix from './comments.json';
import {createLlamacppRequest, llamaData, llamaRequest, llamacppRequestEndpoint} from './llamacpp-api';
// llama.cpp server response format
type llamaData = {
content: string,
generation_settings: JSON,
model: string,
prompt: string,
stopped_eos: boolean,
stopped_limit: boolean,
stopped_word: boolean,
stopping_word: string,
timings: {
predicted_ms: number,
predicted_n: number,
predicted_per_second: number,
predicted_per_token_ms: number,
prompt_ms: number,
prompt_n: number,
prompt_per_second: number,
prompt_per_token_ms: number
},
tokens_cached: number,
tokens_evaluated: number,
tokens_predicted: number,
truncated: boolean
};
type llamaRequest = {
n_predict: number,
mirostat: number,
repeat_penalty: number,
frequency_penalty: number,
presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
cache_prompt: boolean,
prompt?: string,
input_prefix?: string,
input_suffix?: string
};
type fetchErrorCause = {
errno: number,
code: string,
@ -172,51 +127,14 @@ export function activate(context: vscode.ExtensionContext) {
// FIXME: is there a more efficient way?
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
const fim = config.get("fimEnabled") as boolean;
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
let req_str: string;
let request: llamaRequest = {
n_predict: config.get("llamaMaxtokens") as number,
mirostat: config.get("llamaMirostat") as number,
repeat_penalty: config.get("llamaRepeatPenalty") as number,
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
presence_penalty: config.get("llamaPresencePenalty,") as number,
repeat_last_n: config.get("llamaRepeatCtx,") as number,
temperature: config.get("llamaTemperature") as number,
top_p: config.get("llamaTop_p") as number,
top_k: config.get("llamaTop_k") as number,
typical_p: config.get("llamaTypical_p") as number,
tfs_z: config.get("llamaTailfree_z,") as number,
seed: config.get("llamaSeed") as number,
stream: false,
cache_prompt: config.get("llamaCachePrompt") as boolean
};
// check if fill in middle is enabled and fill the request prompt accordingly
if (fim === true) {
if (fimRequest === true) {
req_str = '/infill';
request.input_prefix = doc_before;
request.input_suffix = doc_after;
} else {
const fim_beg = config.get("fimBeginString") as string;
const fim_hole = config.get("fimHoleString") as string;
const fim_end = config.get("fimEndString") as string;
req_str = '/completion';
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
}
} else {
req_str = '/completion';
request.prompt = doc_before;
}
const request: llamaRequest = createLlamacppRequest(config, doc_before, doc_after);
console.log(JSON.stringify(request));
let data: llamaData;
// try to send the request to the running server
try {
const response_promise = fetch(
(config.get("llamaHost") as string).concat(req_str),
llamacppRequestEndpoint(config),
{
method: 'POST',
headers: {

@ -0,0 +1,103 @@
import * as vscode from 'vscode';
// llama.cpp server response format
export type llamaData = {
content: string,
generation_settings: JSON,
model: string,
prompt: string,
stopped_eos: boolean,
stopped_limit: boolean,
stopped_word: boolean,
stopping_word: string,
timings: {
predicted_ms: number,
predicted_n: number,
predicted_per_second: number,
predicted_per_token_ms: number,
prompt_ms: number,
prompt_n: number,
prompt_per_second: number,
prompt_per_token_ms: number
},
tokens_cached: number,
tokens_evaluated: number,
tokens_predicted: number,
truncated: boolean
};
export type llamaRequest = {
n_predict: number,
mirostat: number,
repeat_penalty: number,
frequency_penalty: number,
presence_penalty: number,
repeat_last_n: number,
temperature: number,
top_p: number,
top_k: number,
typical_p: number,
tfs_z: number,
seed: number,
stream: boolean,
cache_prompt: boolean,
prompt?: string,
input_prefix?: string,
input_suffix?: string
};
export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest
{
let request: llamaRequest = {
n_predict: config.get("llamaMaxtokens") as number,
mirostat: config.get("llamaMirostat") as number,
repeat_penalty: config.get("llamaRepeatPenalty") as number,
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
presence_penalty: config.get("llamaPresencePenalty,") as number,
repeat_last_n: config.get("llamaRepeatCtx,") as number,
temperature: config.get("llamaTemperature") as number,
top_p: config.get("llamaTop_p") as number,
top_k: config.get("llamaTop_k") as number,
typical_p: config.get("llamaTypical_p") as number,
tfs_z: config.get("llamaTailfree_z,") as number,
seed: config.get("llamaSeed") as number,
stream: false,
cache_prompt: config.get("llamaCachePrompt") as boolean
};
const fim = config.get("fimEnabled") as boolean;
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
if (fim === true) {
if (fimRequest === true) {
request.input_prefix = doc_before;
request.input_suffix = doc_after;
} else {
const fim_beg = config.get("fimBeginString") as string;
const fim_hole = config.get("fimHoleString") as string;
const fim_end = config.get("fimEndString") as string;
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
}
} else {
request.prompt = doc_before;
}
return request;
}
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string
{
const fim = config.get("fimEnabled") as boolean;
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
let req_str: string = config.get("llamaHost") as string;
if (fim === true && fimRequest === true) {
req_str += '/infill';
} else {
req_str += '/completion';
}
return req_str;
}
Loading…
Cancel
Save