partial move to file
This commit is contained in:
parent
efb85c2cb4
commit
0a493294cf
@ -1,54 +1,9 @@
|
|||||||
import { ok } from 'assert';
|
import { ok } from 'assert';
|
||||||
import * as vscode from 'vscode';
|
import * as vscode from 'vscode';
|
||||||
import commentPrefix from './comments.json';
|
import commentPrefix from './comments.json';
|
||||||
|
import {createLlamacppRequest, llamaData, llamaRequest, llamacppRequestEndpoint} from './llamacpp-api';
|
||||||
|
|
||||||
|
|
||||||
// llama.cpp server response format
|
|
||||||
type llamaData = {
|
|
||||||
content: string,
|
|
||||||
generation_settings: JSON,
|
|
||||||
model: string,
|
|
||||||
prompt: string,
|
|
||||||
stopped_eos: boolean,
|
|
||||||
stopped_limit: boolean,
|
|
||||||
stopped_word: boolean,
|
|
||||||
stopping_word: string,
|
|
||||||
timings: {
|
|
||||||
predicted_ms: number,
|
|
||||||
predicted_n: number,
|
|
||||||
predicted_per_second: number,
|
|
||||||
predicted_per_token_ms: number,
|
|
||||||
prompt_ms: number,
|
|
||||||
prompt_n: number,
|
|
||||||
prompt_per_second: number,
|
|
||||||
prompt_per_token_ms: number
|
|
||||||
},
|
|
||||||
tokens_cached: number,
|
|
||||||
tokens_evaluated: number,
|
|
||||||
tokens_predicted: number,
|
|
||||||
truncated: boolean
|
|
||||||
};
|
|
||||||
|
|
||||||
type llamaRequest = {
|
|
||||||
n_predict: number,
|
|
||||||
mirostat: number,
|
|
||||||
repeat_penalty: number,
|
|
||||||
frequency_penalty: number,
|
|
||||||
presence_penalty: number,
|
|
||||||
repeat_last_n: number,
|
|
||||||
temperature: number,
|
|
||||||
top_p: number,
|
|
||||||
top_k: number,
|
|
||||||
typical_p: number,
|
|
||||||
tfs_z: number,
|
|
||||||
seed: number,
|
|
||||||
stream: boolean,
|
|
||||||
cache_prompt: boolean,
|
|
||||||
prompt?: string,
|
|
||||||
input_prefix?: string,
|
|
||||||
input_suffix?: string
|
|
||||||
};
|
|
||||||
|
|
||||||
type fetchErrorCause = {
|
type fetchErrorCause = {
|
||||||
errno: number,
|
errno: number,
|
||||||
code: string,
|
code: string,
|
||||||
@ -172,51 +127,14 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
// FIXME: is there a more efficient way?
|
// FIXME: is there a more efficient way?
|
||||||
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
|
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
|
||||||
|
|
||||||
const fim = config.get("fimEnabled") as boolean;
|
const request: llamaRequest = createLlamacppRequest(config, doc_before, doc_after);
|
||||||
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
|
|
||||||
let req_str: string;
|
|
||||||
let request: llamaRequest = {
|
|
||||||
n_predict: config.get("llamaMaxtokens") as number,
|
|
||||||
mirostat: config.get("llamaMirostat") as number,
|
|
||||||
repeat_penalty: config.get("llamaRepeatPenalty") as number,
|
|
||||||
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
|
|
||||||
presence_penalty: config.get("llamaPresencePenalty,") as number,
|
|
||||||
repeat_last_n: config.get("llamaRepeatCtx,") as number,
|
|
||||||
temperature: config.get("llamaTemperature") as number,
|
|
||||||
top_p: config.get("llamaTop_p") as number,
|
|
||||||
top_k: config.get("llamaTop_k") as number,
|
|
||||||
typical_p: config.get("llamaTypical_p") as number,
|
|
||||||
tfs_z: config.get("llamaTailfree_z,") as number,
|
|
||||||
seed: config.get("llamaSeed") as number,
|
|
||||||
stream: false,
|
|
||||||
cache_prompt: config.get("llamaCachePrompt") as boolean
|
|
||||||
};
|
|
||||||
|
|
||||||
// check if fill in middle is enabled and fill the request prompt accordingly
|
|
||||||
if (fim === true) {
|
|
||||||
if (fimRequest === true) {
|
|
||||||
req_str = '/infill';
|
|
||||||
request.input_prefix = doc_before;
|
|
||||||
request.input_suffix = doc_after;
|
|
||||||
} else {
|
|
||||||
const fim_beg = config.get("fimBeginString") as string;
|
|
||||||
const fim_hole = config.get("fimHoleString") as string;
|
|
||||||
const fim_end = config.get("fimEndString") as string;
|
|
||||||
req_str = '/completion';
|
|
||||||
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
req_str = '/completion';
|
|
||||||
request.prompt = doc_before;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(JSON.stringify(request));
|
console.log(JSON.stringify(request));
|
||||||
|
|
||||||
let data: llamaData;
|
let data: llamaData;
|
||||||
// try to send the request to the running server
|
// try to send the request to the running server
|
||||||
try {
|
try {
|
||||||
const response_promise = fetch(
|
const response_promise = fetch(
|
||||||
(config.get("llamaHost") as string).concat(req_str),
|
llamacppRequestEndpoint(config),
|
||||||
{
|
{
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
|
103
src/llamacpp-api.ts
Normal file
103
src/llamacpp-api.ts
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
import * as vscode from 'vscode';
|
||||||
|
|
||||||
|
|
||||||
|
// llama.cpp server response format
|
||||||
|
export type llamaData = {
|
||||||
|
content: string,
|
||||||
|
generation_settings: JSON,
|
||||||
|
model: string,
|
||||||
|
prompt: string,
|
||||||
|
stopped_eos: boolean,
|
||||||
|
stopped_limit: boolean,
|
||||||
|
stopped_word: boolean,
|
||||||
|
stopping_word: string,
|
||||||
|
timings: {
|
||||||
|
predicted_ms: number,
|
||||||
|
predicted_n: number,
|
||||||
|
predicted_per_second: number,
|
||||||
|
predicted_per_token_ms: number,
|
||||||
|
prompt_ms: number,
|
||||||
|
prompt_n: number,
|
||||||
|
prompt_per_second: number,
|
||||||
|
prompt_per_token_ms: number
|
||||||
|
},
|
||||||
|
tokens_cached: number,
|
||||||
|
tokens_evaluated: number,
|
||||||
|
tokens_predicted: number,
|
||||||
|
truncated: boolean
|
||||||
|
};
|
||||||
|
|
||||||
|
export type llamaRequest = {
|
||||||
|
n_predict: number,
|
||||||
|
mirostat: number,
|
||||||
|
repeat_penalty: number,
|
||||||
|
frequency_penalty: number,
|
||||||
|
presence_penalty: number,
|
||||||
|
repeat_last_n: number,
|
||||||
|
temperature: number,
|
||||||
|
top_p: number,
|
||||||
|
top_k: number,
|
||||||
|
typical_p: number,
|
||||||
|
tfs_z: number,
|
||||||
|
seed: number,
|
||||||
|
stream: boolean,
|
||||||
|
cache_prompt: boolean,
|
||||||
|
prompt?: string,
|
||||||
|
input_prefix?: string,
|
||||||
|
input_suffix?: string
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest
|
||||||
|
{
|
||||||
|
let request: llamaRequest = {
|
||||||
|
n_predict: config.get("llamaMaxtokens") as number,
|
||||||
|
mirostat: config.get("llamaMirostat") as number,
|
||||||
|
repeat_penalty: config.get("llamaRepeatPenalty") as number,
|
||||||
|
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
|
||||||
|
presence_penalty: config.get("llamaPresencePenalty,") as number,
|
||||||
|
repeat_last_n: config.get("llamaRepeatCtx,") as number,
|
||||||
|
temperature: config.get("llamaTemperature") as number,
|
||||||
|
top_p: config.get("llamaTop_p") as number,
|
||||||
|
top_k: config.get("llamaTop_k") as number,
|
||||||
|
typical_p: config.get("llamaTypical_p") as number,
|
||||||
|
tfs_z: config.get("llamaTailfree_z,") as number,
|
||||||
|
seed: config.get("llamaSeed") as number,
|
||||||
|
stream: false,
|
||||||
|
cache_prompt: config.get("llamaCachePrompt") as boolean
|
||||||
|
};
|
||||||
|
|
||||||
|
const fim = config.get("fimEnabled") as boolean;
|
||||||
|
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
|
||||||
|
|
||||||
|
if (fim === true) {
|
||||||
|
if (fimRequest === true) {
|
||||||
|
request.input_prefix = doc_before;
|
||||||
|
request.input_suffix = doc_after;
|
||||||
|
} else {
|
||||||
|
const fim_beg = config.get("fimBeginString") as string;
|
||||||
|
const fim_hole = config.get("fimHoleString") as string;
|
||||||
|
const fim_end = config.get("fimEndString") as string;
|
||||||
|
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
request.prompt = doc_before;
|
||||||
|
}
|
||||||
|
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string
|
||||||
|
{
|
||||||
|
const fim = config.get("fimEnabled") as boolean;
|
||||||
|
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
|
||||||
|
let req_str: string = config.get("llamaHost") as string;
|
||||||
|
|
||||||
|
if (fim === true && fimRequest === true) {
|
||||||
|
req_str += '/infill';
|
||||||
|
} else {
|
||||||
|
req_str += '/completion';
|
||||||
|
}
|
||||||
|
|
||||||
|
return req_str;
|
||||||
|
}
|
0
src/openai-api.ts
Normal file
0
src/openai-api.ts
Normal file
Loading…
Reference in New Issue
Block a user