parent
efb85c2cb4
commit
0a493294cf
@ -0,0 +1,103 @@ |
||||
import * as vscode from 'vscode'; |
||||
|
||||
|
||||
// llama.cpp server response format
|
||||
export type llamaData = { |
||||
content: string, |
||||
generation_settings: JSON, |
||||
model: string, |
||||
prompt: string, |
||||
stopped_eos: boolean, |
||||
stopped_limit: boolean, |
||||
stopped_word: boolean, |
||||
stopping_word: string, |
||||
timings: { |
||||
predicted_ms: number, |
||||
predicted_n: number, |
||||
predicted_per_second: number, |
||||
predicted_per_token_ms: number, |
||||
prompt_ms: number, |
||||
prompt_n: number, |
||||
prompt_per_second: number, |
||||
prompt_per_token_ms: number |
||||
}, |
||||
tokens_cached: number, |
||||
tokens_evaluated: number, |
||||
tokens_predicted: number, |
||||
truncated: boolean |
||||
}; |
||||
|
||||
export type llamaRequest = { |
||||
n_predict: number, |
||||
mirostat: number, |
||||
repeat_penalty: number, |
||||
frequency_penalty: number, |
||||
presence_penalty: number, |
||||
repeat_last_n: number, |
||||
temperature: number, |
||||
top_p: number, |
||||
top_k: number, |
||||
typical_p: number, |
||||
tfs_z: number, |
||||
seed: number, |
||||
stream: boolean, |
||||
cache_prompt: boolean, |
||||
prompt?: string, |
||||
input_prefix?: string, |
||||
input_suffix?: string |
||||
}; |
||||
|
||||
|
||||
export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest |
||||
{ |
||||
let request: llamaRequest = { |
||||
n_predict: config.get("llamaMaxtokens") as number, |
||||
mirostat: config.get("llamaMirostat") as number, |
||||
repeat_penalty: config.get("llamaRepeatPenalty") as number, |
||||
frequency_penalty: config.get("llamaFrequencyPenalty,") as number, |
||||
presence_penalty: config.get("llamaPresencePenalty,") as number, |
||||
repeat_last_n: config.get("llamaRepeatCtx,") as number, |
||||
temperature: config.get("llamaTemperature") as number, |
||||
top_p: config.get("llamaTop_p") as number, |
||||
top_k: config.get("llamaTop_k") as number, |
||||
typical_p: config.get("llamaTypical_p") as number, |
||||
tfs_z: config.get("llamaTailfree_z,") as number, |
||||
seed: config.get("llamaSeed") as number, |
||||
stream: false, |
||||
cache_prompt: config.get("llamaCachePrompt") as boolean |
||||
}; |
||||
|
||||
const fim = config.get("fimEnabled") as boolean; |
||||
const fimRequest = config.get("useFillInMiddleRequest") as boolean; |
||||
|
||||
if (fim === true) { |
||||
if (fimRequest === true) { |
||||
request.input_prefix = doc_before; |
||||
request.input_suffix = doc_after; |
||||
} else { |
||||
const fim_beg = config.get("fimBeginString") as string; |
||||
const fim_hole = config.get("fimHoleString") as string; |
||||
const fim_end = config.get("fimEndString") as string; |
||||
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end; |
||||
} |
||||
} else { |
||||
request.prompt = doc_before; |
||||
} |
||||
|
||||
return request; |
||||
} |
||||
|
||||
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string |
||||
{ |
||||
const fim = config.get("fimEnabled") as boolean; |
||||
const fimRequest = config.get("useFillInMiddleRequest") as boolean; |
||||
let req_str: string = config.get("llamaHost") as string; |
||||
|
||||
if (fim === true && fimRequest === true) { |
||||
req_str += '/infill'; |
||||
} else { |
||||
req_str += '/completion'; |
||||
} |
||||
|
||||
return req_str; |
||||
} |
Loading…
Reference in new issue