|
|
|
@ -3,10 +3,8 @@ import { |
|
|
|
|
FetchErrorCause, |
|
|
|
|
ResponseData, |
|
|
|
|
showMessageWithTimeout, |
|
|
|
|
showPendingStatusBar, |
|
|
|
|
updateStatusBarMessage, |
|
|
|
|
} from './common'; |
|
|
|
|
import { config } from 'process'; |
|
|
|
|
|
|
|
|
|
// oogabooga/text-generation-webui OpenAI compatible API
|
|
|
|
|
// https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
|
|
|
@ -98,19 +96,19 @@ export function createOpenAIAPIRequest( |
|
|
|
|
): OpenAICompletionRequest { |
|
|
|
|
let request: OpenAICompletionRequest = { |
|
|
|
|
prompt: '', |
|
|
|
|
max_tokens: config.get('llamaMaxtokens') as number, |
|
|
|
|
mirostat_mode: config.get('llamaMirostat') as number, |
|
|
|
|
repetition_penalty: config.get('llamaRepeatPenalty') as number, |
|
|
|
|
frequency_penalty: config.get('llamaFrequencyPenalty,') as number, |
|
|
|
|
presence_penalty: config.get('llamaPresencePenalty,') as number, |
|
|
|
|
repetition_penalty_range: config.get('llamaRepeatCtx,') as number, |
|
|
|
|
temperature: config.get('llamaTemperature') as number, |
|
|
|
|
top_p: config.get('llamaTop_p') as number, |
|
|
|
|
top_k: config.get('llamaTop_k') as number, |
|
|
|
|
typical_p: config.get('llamaTypical_p') as number, |
|
|
|
|
tfs: config.get('llamaTailfree_z,') as number, |
|
|
|
|
seed: config.get('llamaSeed') as number, |
|
|
|
|
stream: config.get('llamaAPIStream'), |
|
|
|
|
max_tokens: config.get('parameters.MaxTokens') as number, |
|
|
|
|
mirostat_mode: config.get('parameters.Mirostat') as number, |
|
|
|
|
repetition_penalty: config.get('parameters.RepeatPenalty') as number, |
|
|
|
|
frequency_penalty: config.get('parameters.FrequencyPenalty,') as number, |
|
|
|
|
presence_penalty: config.get('parameters.PresencePenalty,') as number, |
|
|
|
|
repetition_penalty_range: config.get('parameters.RepeatCtx,') as number, |
|
|
|
|
temperature: config.get('parameters.Temperature') as number, |
|
|
|
|
top_p: config.get('parameters.Top_p') as number, |
|
|
|
|
top_k: config.get('parameters.Top_k') as number, |
|
|
|
|
typical_p: config.get('parameters.Typical_p') as number, |
|
|
|
|
tfs: config.get('parameters.Tailfree_z,') as number, |
|
|
|
|
seed: config.get('parameters.Seed') as number, |
|
|
|
|
stream: config.get('parameters.stream') as boolean, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
const fim = config.get('fimEnabled') as boolean; |
|
|
|
@ -127,11 +125,13 @@ export function createOpenAIAPIRequest( |
|
|
|
|
return request; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// for now only completions is implemented
|
|
|
|
|
// for now only vv1/completions is implemented
|
|
|
|
|
// TODO: implement chat
|
|
|
|
|
export function openAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string { |
|
|
|
|
return (config.get('llamaHost') as string) + '/v1/completions'; |
|
|
|
|
return (config.get('endpoint') as string) + '/v1/completions'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// make a request and parse the incoming data
|
|
|
|
|
export async function openAIMakeRequest( |
|
|
|
|
request_body: OpenAICompletionRequest, |
|
|
|
|
endpoint: string |
|
|
|
@ -169,18 +169,21 @@ export async function openAIMakeRequest( |
|
|
|
|
// start a timer
|
|
|
|
|
const timer_start = performance.now(); |
|
|
|
|
|
|
|
|
|
let chunk_number: number = 1; |
|
|
|
|
for await (const chunk of response.body) { |
|
|
|
|
// FIXME: why the fuck do I have to do this shite
|
|
|
|
|
let data_text = new TextDecoder().decode(chunk); |
|
|
|
|
data_text = data_text.substring(data_text.indexOf('{')); |
|
|
|
|
let data: OpenAICompletionResponse; |
|
|
|
|
// each chunk of data is a complete response in the form of a uint8 array
|
|
|
|
|
const data_text = Buffer.from(chunk as Uint8Array).toString(); |
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
data = JSON.parse(data_text); |
|
|
|
|
} catch (e: any) { |
|
|
|
|
console.error(e); |
|
|
|
|
return ret; |
|
|
|
|
// each response chunk contains one or more data chunks, which in turn are just json data
|
|
|
|
|
const data_chunks = data_text.split('data: '); |
|
|
|
|
let data: OpenAICompletionResponse; |
|
|
|
|
for (const data_string of data_chunks) { |
|
|
|
|
data_string.trim(); |
|
|
|
|
if (data_string.length < 2) { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
data = JSON.parse(data_string); |
|
|
|
|
//console.log(JSON.stringify(data));
|
|
|
|
|
|
|
|
|
|
if (Object.hasOwn(data, 'detail') === true) { |
|
|
|
@ -188,22 +191,23 @@ export async function openAIMakeRequest( |
|
|
|
|
// TODO: why did it error?
|
|
|
|
|
throw new Error('OpenAI Endpoint Error'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// unpack the data
|
|
|
|
|
data = data as OpenAICompletionSuccessResponse; |
|
|
|
|
// FIXME: why the choices may be multiple?
|
|
|
|
|
// TODO: display the multiple choices
|
|
|
|
|
//console.log(data.choices[0].text);
|
|
|
|
|
updateStatusBarMessage(data.choices[0].text); |
|
|
|
|
ret.content += data.choices[0].text; |
|
|
|
|
|
|
|
|
|
for (const choice of data.choices) { |
|
|
|
|
ret.content += choice.text; |
|
|
|
|
updateStatusBarMessage(chunk_number, choice.text); |
|
|
|
|
chunk_number++; |
|
|
|
|
} |
|
|
|
|
ret.tokens += data.usage?.completion_tokens || 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// stop the timer
|
|
|
|
|
const timer_end = performance.now(); |
|
|
|
|
ret.time = (timer_end - timer_start) / 1000.0; |
|
|
|
|
// clear the status bar item
|
|
|
|
|
updateStatusBarMessage(''); |
|
|
|
|
updateStatusBarMessage(0, ''); |
|
|
|
|
} catch (e: any) { |
|
|
|
|
console.error(e); |
|
|
|
|
const err = e as TypeError; |
|
|
|
|