From 0a493294cf896060d9dc5986f796e974df17d847 Mon Sep 17 00:00:00 2001 From: Alessandro Mauri Date: Thu, 14 Dec 2023 21:45:01 +0100 Subject: [PATCH] partial move to file --- src/extension.ts | 88 ++----------------------------------- src/llamacpp-api.ts | 103 ++++++++++++++++++++++++++++++++++++++++++++ src/openai-api.ts | 0 3 files changed, 106 insertions(+), 85 deletions(-) create mode 100644 src/llamacpp-api.ts create mode 100644 src/openai-api.ts diff --git a/src/extension.ts b/src/extension.ts index 282cdf1..528907d 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -1,54 +1,9 @@ import { ok } from 'assert'; import * as vscode from 'vscode'; import commentPrefix from './comments.json'; +import {createLlamacppRequest, llamaData, llamaRequest, llamacppRequestEndpoint} from './llamacpp-api'; -// llama.cpp server response format -type llamaData = { - content: string, - generation_settings: JSON, - model: string, - prompt: string, - stopped_eos: boolean, - stopped_limit: boolean, - stopped_word: boolean, - stopping_word: string, - timings: { - predicted_ms: number, - predicted_n: number, - predicted_per_second: number, - predicted_per_token_ms: number, - prompt_ms: number, - prompt_n: number, - prompt_per_second: number, - prompt_per_token_ms: number - }, - tokens_cached: number, - tokens_evaluated: number, - tokens_predicted: number, - truncated: boolean -}; - -type llamaRequest = { - n_predict: number, - mirostat: number, - repeat_penalty: number, - frequency_penalty: number, - presence_penalty: number, - repeat_last_n: number, - temperature: number, - top_p: number, - top_k: number, - typical_p: number, - tfs_z: number, - seed: number, - stream: boolean, - cache_prompt: boolean, - prompt?: string, - input_prefix?: string, - input_suffix?: string -}; - type fetchErrorCause = { errno: number, code: string, @@ -172,51 +127,14 @@ export function activate(context: vscode.ExtensionContext) { // FIXME: is there a more efficient way? doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; - const fim = config.get("fimEnabled") as boolean; - const fimRequest = config.get("useFillInMiddleRequest") as boolean; - let req_str: string; - let request: llamaRequest = { - n_predict: config.get("llamaMaxtokens") as number, - mirostat: config.get("llamaMirostat") as number, - repeat_penalty: config.get("llamaRepeatPenalty") as number, - frequency_penalty: config.get("llamaFrequencyPenalty,") as number, - presence_penalty: config.get("llamaPresencePenalty,") as number, - repeat_last_n: config.get("llamaRepeatCtx,") as number, - temperature: config.get("llamaTemperature") as number, - top_p: config.get("llamaTop_p") as number, - top_k: config.get("llamaTop_k") as number, - typical_p: config.get("llamaTypical_p") as number, - tfs_z: config.get("llamaTailfree_z,") as number, - seed: config.get("llamaSeed") as number, - stream: false, - cache_prompt: config.get("llamaCachePrompt") as boolean - }; - - // check if fill in middle is enabled and fill the request prompt accordingly - if (fim === true) { - if (fimRequest === true) { - req_str = '/infill'; - request.input_prefix = doc_before; - request.input_suffix = doc_after; - } else { - const fim_beg = config.get("fimBeginString") as string; - const fim_hole = config.get("fimHoleString") as string; - const fim_end = config.get("fimEndString") as string; - req_str = '/completion'; - request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end; - } - } else { - req_str = '/completion'; - request.prompt = doc_before; - } - + const request: llamaRequest = createLlamacppRequest(config, doc_before, doc_after); console.log(JSON.stringify(request)); let data: llamaData; // try to send the request to the running server try { const response_promise = fetch( - (config.get("llamaHost") as string).concat(req_str), + llamacppRequestEndpoint(config), { method: 'POST', headers: { diff --git a/src/llamacpp-api.ts b/src/llamacpp-api.ts new file mode 100644 index 0000000..bbc67e4 --- /dev/null +++ b/src/llamacpp-api.ts @@ -0,0 +1,103 @@ +import * as vscode from 'vscode'; + + +// llama.cpp server response format +export type llamaData = { + content: string, + generation_settings: JSON, + model: string, + prompt: string, + stopped_eos: boolean, + stopped_limit: boolean, + stopped_word: boolean, + stopping_word: string, + timings: { + predicted_ms: number, + predicted_n: number, + predicted_per_second: number, + predicted_per_token_ms: number, + prompt_ms: number, + prompt_n: number, + prompt_per_second: number, + prompt_per_token_ms: number + }, + tokens_cached: number, + tokens_evaluated: number, + tokens_predicted: number, + truncated: boolean +}; + +export type llamaRequest = { + n_predict: number, + mirostat: number, + repeat_penalty: number, + frequency_penalty: number, + presence_penalty: number, + repeat_last_n: number, + temperature: number, + top_p: number, + top_k: number, + typical_p: number, + tfs_z: number, + seed: number, + stream: boolean, + cache_prompt: boolean, + prompt?: string, + input_prefix?: string, + input_suffix?: string +}; + + +export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest +{ + let request: llamaRequest = { + n_predict: config.get("llamaMaxtokens") as number, + mirostat: config.get("llamaMirostat") as number, + repeat_penalty: config.get("llamaRepeatPenalty") as number, + frequency_penalty: config.get("llamaFrequencyPenalty,") as number, + presence_penalty: config.get("llamaPresencePenalty,") as number, + repeat_last_n: config.get("llamaRepeatCtx,") as number, + temperature: config.get("llamaTemperature") as number, + top_p: config.get("llamaTop_p") as number, + top_k: config.get("llamaTop_k") as number, + typical_p: config.get("llamaTypical_p") as number, + tfs_z: config.get("llamaTailfree_z,") as number, + seed: config.get("llamaSeed") as number, + stream: false, + cache_prompt: config.get("llamaCachePrompt") as boolean + }; + + const fim = config.get("fimEnabled") as boolean; + const fimRequest = config.get("useFillInMiddleRequest") as boolean; + + if (fim === true) { + if (fimRequest === true) { + request.input_prefix = doc_before; + request.input_suffix = doc_after; + } else { + const fim_beg = config.get("fimBeginString") as string; + const fim_hole = config.get("fimHoleString") as string; + const fim_end = config.get("fimEndString") as string; + request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end; + } + } else { + request.prompt = doc_before; + } + + return request; +} + +export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string +{ + const fim = config.get("fimEnabled") as boolean; + const fimRequest = config.get("useFillInMiddleRequest") as boolean; + let req_str: string = config.get("llamaHost") as string; + + if (fim === true && fimRequest === true) { + req_str += '/infill'; + } else { + req_str += '/completion'; + } + + return req_str; +} \ No newline at end of file diff --git a/src/openai-api.ts b/src/openai-api.ts new file mode 100644 index 0000000..e69de29