separate llama.cpp server api into a different source file
This commit is contained in:
parent
0a493294cf
commit
960b2190bf
2
TODO.md
2
TODO.md
@ -1,4 +1,4 @@
|
|||||||
[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */
|
[x] - in extensions.json add suffix for languages that require it such as css where comments are: /_ stuff _/
|
||||||
[] - test cancel token
|
[] - test cancel token
|
||||||
[] - add fill in middle
|
[] - add fill in middle
|
||||||
[x] - add config option to disable the extension
|
[x] - add config option to disable the extension
|
||||||
|
17
package.json
17
package.json
@ -223,6 +223,16 @@
|
|||||||
"dumbpilot.llamaSystemPrompt": {
|
"dumbpilot.llamaSystemPrompt": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
|
"description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
|
||||||
|
},
|
||||||
|
"dumbpilot.llamaUseOpenAIAPI": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": true,
|
||||||
|
"description": "Use the OpenAI API to make requests to the server instead of the llama.cpp server API"
|
||||||
|
},
|
||||||
|
"dumbpilot.llamaModelName": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "deepseek-coder-6.7B-base.gguf",
|
||||||
|
"description": "Name of the model to use, only works in OpenAI API mode"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -246,5 +256,12 @@
|
|||||||
"mocha": "^10.2.0",
|
"mocha": "^10.2.0",
|
||||||
"typescript": "^5.2.2",
|
"typescript": "^5.2.2",
|
||||||
"@vscode/test-electron": "^2.3.6"
|
"@vscode/test-electron": "^2.3.6"
|
||||||
|
},
|
||||||
|
"prettier": {
|
||||||
|
"tabWidth": 4,
|
||||||
|
"printWidth": 100,
|
||||||
|
"useTabs": true,
|
||||||
|
"singleQuote": true,
|
||||||
|
"trailingComma": "es5"
|
||||||
}
|
}
|
||||||
}
|
}
|
58
src/common.ts
Normal file
58
src/common.ts
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import * as vscode from 'vscode';
|
||||||
|
|
||||||
|
// common data structures and functions
|
||||||
|
|
||||||
|
export type FetchErrorCause = {
|
||||||
|
errno: number;
|
||||||
|
code: string;
|
||||||
|
syscall: string;
|
||||||
|
address: string;
|
||||||
|
port: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// a summary of the received data
|
||||||
|
export type ResponseData = {
|
||||||
|
content: string;
|
||||||
|
tokens: number;
|
||||||
|
time: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Show a message notification with a set timeout
|
||||||
|
export async function showMessageWithTimeout(message: string, timeout: number): Promise<void> {
|
||||||
|
void vscode.window.withProgress(
|
||||||
|
{
|
||||||
|
location: vscode.ProgressLocation.Notification,
|
||||||
|
title: message,
|
||||||
|
cancellable: false,
|
||||||
|
},
|
||||||
|
(progress, token) => {
|
||||||
|
token.onCancellationRequested(() => {});
|
||||||
|
|
||||||
|
// This is magic I don't understand
|
||||||
|
const p = new Promise<void>((resolve) => {
|
||||||
|
setTimeout(resolve, timeout);
|
||||||
|
});
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// show a message on the status bar until the promise is resolved
|
||||||
|
export async function showPendingStatusBar(
|
||||||
|
message: string,
|
||||||
|
operation: Promise<any>
|
||||||
|
): Promise<void> {
|
||||||
|
void vscode.window
|
||||||
|
.withProgress(
|
||||||
|
{
|
||||||
|
location: vscode.ProgressLocation.Window,
|
||||||
|
title: message,
|
||||||
|
},
|
||||||
|
() => operation
|
||||||
|
)
|
||||||
|
.then(
|
||||||
|
(aok) => {},
|
||||||
|
(err) => {}
|
||||||
|
);
|
||||||
|
// we already resolve the operation elsewhere
|
||||||
|
}
|
129
src/extension.ts
129
src/extension.ts
@ -1,17 +1,19 @@
|
|||||||
import { ok } from 'assert';
|
import { ok } from 'assert';
|
||||||
import * as vscode from 'vscode';
|
import * as vscode from 'vscode';
|
||||||
import commentPrefix from './comments.json';
|
import commentPrefix from './comments.json';
|
||||||
import {createLlamacppRequest, llamaData, llamaRequest, llamacppRequestEndpoint} from './llamacpp-api';
|
import {
|
||||||
|
LlamaData,
|
||||||
|
LlamaRequest,
|
||||||
type fetchErrorCause = {
|
createLlamacppRequest,
|
||||||
errno: number,
|
llamacppRequestEndpoint,
|
||||||
code: string,
|
llamacppMakeRequest,
|
||||||
syscall: string,
|
} from './llamacpp-api';
|
||||||
address: string,
|
import {
|
||||||
port: number
|
FetchErrorCause,
|
||||||
};
|
ResponseData,
|
||||||
|
showMessageWithTimeout,
|
||||||
|
showPendingStatusBar,
|
||||||
|
} from './common';
|
||||||
|
|
||||||
// clean up the document
|
// clean up the document
|
||||||
function clean_text(txt: string): string {
|
function clean_text(txt: string): string {
|
||||||
@ -24,56 +26,26 @@ function clean_text(txt: string): string {
|
|||||||
return txt;
|
return txt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Show a message notification with a set timeout
|
|
||||||
async function showMessageWithTimeout(message: string, timeout: number): Promise<void> {
|
|
||||||
void vscode.window.withProgress(
|
|
||||||
{
|
|
||||||
location: vscode.ProgressLocation.Notification,
|
|
||||||
title: message,
|
|
||||||
cancellable: false,
|
|
||||||
}, (progress, token) => {
|
|
||||||
token.onCancellationRequested(() => {});
|
|
||||||
|
|
||||||
// This is magic I don't understand
|
|
||||||
const p = new Promise<void>((resolve) => {
|
|
||||||
setTimeout(resolve, timeout);
|
|
||||||
});
|
|
||||||
return p;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// show a message on the status bar until the promise is resolved
|
|
||||||
async function showPendingStatusBar(message: string, operation: Promise<any>): Promise<void> {
|
|
||||||
void vscode.window.withProgress(
|
|
||||||
{
|
|
||||||
location: vscode.ProgressLocation.Window,
|
|
||||||
title: message,
|
|
||||||
}, () => operation ).then((aok) => {}, (err) => {});
|
|
||||||
// we already resolve the operation elsewhere
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
export function activate(context: vscode.ExtensionContext) {
|
export function activate(context: vscode.ExtensionContext) {
|
||||||
|
|
||||||
console.log('dumbpilot is now active');
|
console.log('dumbpilot is now active');
|
||||||
|
|
||||||
let config = vscode.workspace.getConfiguration("dumbpilot");
|
let config = vscode.workspace.getConfiguration('dumbpilot');
|
||||||
|
|
||||||
// handle completion changes
|
// handle completion changes
|
||||||
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
|
context.subscriptions.push(
|
||||||
config = vscode.workspace.getConfiguration("dumbpilot");
|
vscode.workspace.onDidChangeConfiguration((e) => {
|
||||||
}));
|
config = vscode.workspace.getConfiguration('dumbpilot');
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
// TODO: work with local configurations
|
// TODO: work with local configurations
|
||||||
let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
|
let disposable = vscode.commands.registerCommand('dumbpilot.enableCompletion', () => {
|
||||||
config.update("completionEnabled", true);
|
config.update('completionEnabled', true);
|
||||||
});
|
});
|
||||||
context.subscriptions.push(disposable);
|
context.subscriptions.push(disposable);
|
||||||
|
|
||||||
disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
|
disposable = vscode.commands.registerCommand('dumbpilot.disableCompletion', () => {
|
||||||
config.update("completionEnabled", false);
|
config.update('completionEnabled', false);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Register a new provider of inline completions, this does not decide how it is invoked
|
// Register a new provider of inline completions, this does not decide how it is invoked
|
||||||
@ -81,9 +53,8 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
|
||||||
const provider: vscode.InlineCompletionItemProvider = {
|
const provider: vscode.InlineCompletionItemProvider = {
|
||||||
async provideInlineCompletionItems(document, position, context, token) {
|
async provideInlineCompletionItems(document, position, context, token) {
|
||||||
|
|
||||||
// disable if predictive completion is disabled
|
// disable if predictive completion is disabled
|
||||||
if (config.get("completionEnabled") as boolean === false) {
|
if ((config.get('completionEnabled') as boolean) === false) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,14 +66,14 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
|
|
||||||
// FIXME: I don't know if this works
|
// FIXME: I don't know if this works
|
||||||
token.onCancellationRequested(() => {
|
token.onCancellationRequested(() => {
|
||||||
console.log("dumbpilot: operation cancelled, may still be running on the server");
|
console.log('dumbpilot: operation cancelled, may still be running on the server');
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
|
|
||||||
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
|
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
|
||||||
|
|
||||||
const result: vscode.InlineCompletionList = {
|
const result: vscode.InlineCompletionList = {
|
||||||
items: []
|
items: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get the document's text and position to send to the model
|
// Get the document's text and position to send to the model
|
||||||
@ -127,49 +98,19 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
// FIXME: is there a more efficient way?
|
// FIXME: is there a more efficient way?
|
||||||
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
|
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
|
||||||
|
|
||||||
const request: llamaRequest = createLlamacppRequest(config, doc_before, doc_after);
|
// actially make the request
|
||||||
console.log(JSON.stringify(request));
|
const request: LlamaRequest = createLlamacppRequest(config, doc_before, doc_after);
|
||||||
|
const endpoint: string = llamacppRequestEndpoint(config);
|
||||||
|
let data: ResponseData = await llamacppMakeRequest(request, endpoint);
|
||||||
|
|
||||||
let data: llamaData;
|
result.items.push({
|
||||||
// try to send the request to the running server
|
insertText: data.content,
|
||||||
try {
|
range: new vscode.Range(position, position),
|
||||||
const response_promise = fetch(
|
});
|
||||||
llamacppRequestEndpoint(config),
|
|
||||||
{
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'content-type': 'application/json; charset=UTF-8'
|
|
||||||
},
|
|
||||||
body: JSON.stringify(request)
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
showPendingStatusBar("dumbpilot waiting", response_promise);
|
|
||||||
const response = await response_promise;
|
|
||||||
if (response.ok === false) {
|
|
||||||
throw new Error("llama server request is not ok??");
|
|
||||||
}
|
|
||||||
|
|
||||||
data = await response.json() as llamaData;
|
|
||||||
const gen_tokens = data.timings.predicted_n;
|
|
||||||
const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
|
|
||||||
showMessageWithTimeout(`predicted ${gen_tokens} tokens in ${gen_time} seconds`, 1500);
|
|
||||||
|
|
||||||
} catch (e: any) {
|
|
||||||
const err = e as TypeError;
|
|
||||||
const cause: fetchErrorCause = err.cause as fetchErrorCause;
|
|
||||||
const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port;
|
|
||||||
// let the user know something went wrong
|
|
||||||
// TODO: maybe add a retry action or something
|
|
||||||
showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
|
|
||||||
return null;
|
|
||||||
};
|
|
||||||
|
|
||||||
result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
|
|
||||||
return result;
|
return result;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider);
|
vscode.languages.registerInlineCompletionItemProvider({ pattern: '**' }, provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This method is called when your extension is deactivated
|
// This method is called when your extension is deactivated
|
||||||
|
@ -1,83 +1,91 @@
|
|||||||
import * as vscode from 'vscode';
|
import * as vscode from 'vscode';
|
||||||
|
import './common';
|
||||||
|
import {
|
||||||
|
FetchErrorCause,
|
||||||
|
ResponseData,
|
||||||
|
showMessageWithTimeout,
|
||||||
|
showPendingStatusBar,
|
||||||
|
} from './common';
|
||||||
|
|
||||||
// llama.cpp server response format
|
// llama.cpp server response format
|
||||||
export type llamaData = {
|
export type LlamaData = {
|
||||||
content: string,
|
content: string;
|
||||||
generation_settings: JSON,
|
generation_settings: JSON;
|
||||||
model: string,
|
model: string;
|
||||||
prompt: string,
|
prompt: string;
|
||||||
stopped_eos: boolean,
|
stopped_eos: boolean;
|
||||||
stopped_limit: boolean,
|
stopped_limit: boolean;
|
||||||
stopped_word: boolean,
|
stopped_word: boolean;
|
||||||
stopping_word: string,
|
stopping_word: string;
|
||||||
timings: {
|
timings: {
|
||||||
predicted_ms: number,
|
predicted_ms: number;
|
||||||
predicted_n: number,
|
predicted_n: number;
|
||||||
predicted_per_second: number,
|
predicted_per_second: number;
|
||||||
predicted_per_token_ms: number,
|
predicted_per_token_ms: number;
|
||||||
prompt_ms: number,
|
prompt_ms: number;
|
||||||
prompt_n: number,
|
prompt_n: number;
|
||||||
prompt_per_second: number,
|
prompt_per_second: number;
|
||||||
prompt_per_token_ms: number
|
prompt_per_token_ms: number;
|
||||||
},
|
};
|
||||||
tokens_cached: number,
|
tokens_cached: number;
|
||||||
tokens_evaluated: number,
|
tokens_evaluated: number;
|
||||||
tokens_predicted: number,
|
tokens_predicted: number;
|
||||||
truncated: boolean
|
truncated: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type llamaRequest = {
|
export type LlamaRequest = {
|
||||||
n_predict: number,
|
n_predict: number;
|
||||||
mirostat: number,
|
mirostat: number;
|
||||||
repeat_penalty: number,
|
repeat_penalty: number;
|
||||||
frequency_penalty: number,
|
frequency_penalty: number;
|
||||||
presence_penalty: number,
|
presence_penalty: number;
|
||||||
repeat_last_n: number,
|
repeat_last_n: number;
|
||||||
temperature: number,
|
temperature: number;
|
||||||
top_p: number,
|
top_p: number;
|
||||||
top_k: number,
|
top_k: number;
|
||||||
typical_p: number,
|
typical_p: number;
|
||||||
tfs_z: number,
|
tfs_z: number;
|
||||||
seed: number,
|
seed: number;
|
||||||
stream: boolean,
|
stream: boolean;
|
||||||
cache_prompt: boolean,
|
cache_prompt: boolean;
|
||||||
prompt?: string,
|
prompt?: string;
|
||||||
input_prefix?: string,
|
input_prefix?: string;
|
||||||
input_suffix?: string
|
input_suffix?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export function createLlamacppRequest(
|
||||||
export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest
|
config: vscode.WorkspaceConfiguration,
|
||||||
{
|
doc_before: string,
|
||||||
let request: llamaRequest = {
|
doc_after: string
|
||||||
n_predict: config.get("llamaMaxtokens") as number,
|
): LlamaRequest {
|
||||||
mirostat: config.get("llamaMirostat") as number,
|
let request: LlamaRequest = {
|
||||||
repeat_penalty: config.get("llamaRepeatPenalty") as number,
|
n_predict: config.get('llamaMaxtokens') as number,
|
||||||
frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
|
mirostat: config.get('llamaMirostat') as number,
|
||||||
presence_penalty: config.get("llamaPresencePenalty,") as number,
|
repeat_penalty: config.get('llamaRepeatPenalty') as number,
|
||||||
repeat_last_n: config.get("llamaRepeatCtx,") as number,
|
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
|
||||||
temperature: config.get("llamaTemperature") as number,
|
presence_penalty: config.get('llamaPresencePenalty,') as number,
|
||||||
top_p: config.get("llamaTop_p") as number,
|
repeat_last_n: config.get('llamaRepeatCtx,') as number,
|
||||||
top_k: config.get("llamaTop_k") as number,
|
temperature: config.get('llamaTemperature') as number,
|
||||||
typical_p: config.get("llamaTypical_p") as number,
|
top_p: config.get('llamaTop_p') as number,
|
||||||
tfs_z: config.get("llamaTailfree_z,") as number,
|
top_k: config.get('llamaTop_k') as number,
|
||||||
seed: config.get("llamaSeed") as number,
|
typical_p: config.get('llamaTypical_p') as number,
|
||||||
|
tfs_z: config.get('llamaTailfree_z,') as number,
|
||||||
|
seed: config.get('llamaSeed') as number,
|
||||||
stream: false,
|
stream: false,
|
||||||
cache_prompt: config.get("llamaCachePrompt") as boolean
|
cache_prompt: config.get('llamaCachePrompt') as boolean,
|
||||||
};
|
};
|
||||||
|
|
||||||
const fim = config.get("fimEnabled") as boolean;
|
const fim = config.get('fimEnabled') as boolean;
|
||||||
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
|
const fimRequest = config.get('useFillInMiddleRequest') as boolean;
|
||||||
|
|
||||||
if (fim === true) {
|
if (fim === true) {
|
||||||
if (fimRequest === true) {
|
if (fimRequest === true) {
|
||||||
request.input_prefix = doc_before;
|
request.input_prefix = doc_before;
|
||||||
request.input_suffix = doc_after;
|
request.input_suffix = doc_after;
|
||||||
} else {
|
} else {
|
||||||
const fim_beg = config.get("fimBeginString") as string;
|
const fim_beg = config.get('fimBeginString') as string;
|
||||||
const fim_hole = config.get("fimHoleString") as string;
|
const fim_hole = config.get('fimHoleString') as string;
|
||||||
const fim_end = config.get("fimEndString") as string;
|
const fim_end = config.get('fimEndString') as string;
|
||||||
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
|
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -87,11 +95,10 @@ export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc
|
|||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string
|
export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
|
||||||
{
|
const fim = config.get('fimEnabled') as boolean;
|
||||||
const fim = config.get("fimEnabled") as boolean;
|
const fimRequest = config.get('useFillInMiddleRequest') as boolean;
|
||||||
const fimRequest = config.get("useFillInMiddleRequest") as boolean;
|
let req_str: string = config.get('llamaHost') as string;
|
||||||
let req_str: string = config.get("llamaHost") as string;
|
|
||||||
|
|
||||||
if (fim === true && fimRequest === true) {
|
if (fim === true && fimRequest === true) {
|
||||||
req_str += '/infill';
|
req_str += '/infill';
|
||||||
@ -101,3 +108,49 @@ export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration):
|
|||||||
|
|
||||||
return req_str;
|
return req_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function llamacppMakeRequest(
|
||||||
|
request: LlamaRequest,
|
||||||
|
endpoint: string
|
||||||
|
): Promise<ResponseData> {
|
||||||
|
let ret: ResponseData = {
|
||||||
|
content: '',
|
||||||
|
tokens: 0,
|
||||||
|
time: 0,
|
||||||
|
};
|
||||||
|
let data: LlamaData;
|
||||||
|
// try to send the request to the running server
|
||||||
|
try {
|
||||||
|
const response_promise = fetch(endpoint, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'content-type': 'application/json; charset=UTF-8',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(request),
|
||||||
|
});
|
||||||
|
|
||||||
|
showPendingStatusBar('dumbpilot waiting', response_promise);
|
||||||
|
const response = await response_promise;
|
||||||
|
if (response.ok === false) {
|
||||||
|
throw new Error('llama server request is not ok??');
|
||||||
|
}
|
||||||
|
|
||||||
|
data = (await response.json()) as LlamaData;
|
||||||
|
const gen_tokens = data.timings.predicted_n;
|
||||||
|
const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
|
||||||
|
showMessageWithTimeout(`predicted ${gen_tokens} tokens in ${gen_time} seconds`, 1500);
|
||||||
|
|
||||||
|
ret.content = data.content;
|
||||||
|
ret.tokens = data.tokens_predicted;
|
||||||
|
ret.time = data.timings.predicted_ms / 1000;
|
||||||
|
} catch (e: any) {
|
||||||
|
const err = e as TypeError;
|
||||||
|
const cause: FetchErrorCause = err.cause as FetchErrorCause;
|
||||||
|
const estr: string =
|
||||||
|
err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port;
|
||||||
|
// let the user know something went wrong
|
||||||
|
// TODO: maybe add a retry action or something
|
||||||
|
showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -0,0 +1,116 @@
|
|||||||
|
import * as vscode from 'vscode';
|
||||||
|
|
||||||
|
// oogabooga/text-generation-webui OpenAI compatible API
|
||||||
|
// https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
||||||
|
|
||||||
|
type OpenAICompletionRequest = {
|
||||||
|
model?: string; // automatic
|
||||||
|
prompt: string;
|
||||||
|
best_of?: number; // 1
|
||||||
|
echo?: boolean; // false
|
||||||
|
frequency_penalty?: number; // null
|
||||||
|
logit_bias?: object; // null
|
||||||
|
logprobs?: number; // 0
|
||||||
|
max_tokens?: number; // 16
|
||||||
|
n?: number; // 1
|
||||||
|
presence_penalty?: number; // 0
|
||||||
|
stop?: string;
|
||||||
|
stream?: boolean; // false
|
||||||
|
suffix?: string;
|
||||||
|
temperature?: number; // 1
|
||||||
|
top_p?: number; // 1
|
||||||
|
user?: string;
|
||||||
|
preset?: string;
|
||||||
|
min_p?: number; // 1
|
||||||
|
top_k?: number; // 1
|
||||||
|
repetition_penalty?: number; // 1
|
||||||
|
repetition_penalty_range?: number; // 1024
|
||||||
|
typical_p?: number; // 1
|
||||||
|
tfs?: number; // 1
|
||||||
|
top_a?: number; // 0
|
||||||
|
epsilon_cutoff?: number; // 0
|
||||||
|
eta_cutoff?: number; // 0
|
||||||
|
guidance_scale?: number; // 1
|
||||||
|
negative_prompt?: string; // ""
|
||||||
|
penalty_alpha?: number; // 0
|
||||||
|
mirostat_mode?: number; // 0
|
||||||
|
mirostat_tau?: number; // 5
|
||||||
|
mirostat_eta?: number; // 0.1
|
||||||
|
temperature_last?: boolean; // false
|
||||||
|
do_sample?: boolean; // true
|
||||||
|
seed?: number; // -1
|
||||||
|
encoder_repetition_penalty?: number; // 1
|
||||||
|
no_repeat_ngram_size?: number; // 0
|
||||||
|
min_length?: number; // 0
|
||||||
|
num_beams?: number; // 1
|
||||||
|
length_penalty?: number; // 1
|
||||||
|
early_stopping?: boolean; // false
|
||||||
|
truncation_length?: number; // 0
|
||||||
|
max_tokens_second?: number; // 0
|
||||||
|
custom_token_bans?: string; // ""
|
||||||
|
auto_max_new_tokens?: boolean; // false
|
||||||
|
ban_eos_token?: boolean; // false
|
||||||
|
add_bos_token?: boolean; // true
|
||||||
|
skip_special_tokens?: boolean; // true
|
||||||
|
grammar_string?: string; // ''
|
||||||
|
};
|
||||||
|
|
||||||
|
type OpenAICompletionSuccessResponse = {
|
||||||
|
id: string;
|
||||||
|
choices: object[];
|
||||||
|
created?: number;
|
||||||
|
model: string;
|
||||||
|
object?: string;
|
||||||
|
usage: object;
|
||||||
|
};
|
||||||
|
|
||||||
|
type OpenAICompletionFailureResponse = {
|
||||||
|
detail: {
|
||||||
|
loc: (string | number)[];
|
||||||
|
msg: string;
|
||||||
|
type: string;
|
||||||
|
}[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type OpenAICompletionResponse = OpenAICompletionSuccessResponse | OpenAICompletionFailureResponse;
|
||||||
|
|
||||||
|
export function createOpenAIAPIRequest(
|
||||||
|
config: vscode.WorkspaceConfiguration,
|
||||||
|
doc_before: string,
|
||||||
|
doc_after: string
|
||||||
|
): OpenAICompletionRequest {
|
||||||
|
let request: OpenAICompletionRequest = {
|
||||||
|
prompt: '',
|
||||||
|
max_tokens: config.get('llamaMaxtokens') as number,
|
||||||
|
mirostat_mode: config.get('llamaMirostat') as number,
|
||||||
|
repetition_penalty: config.get('llamaRepeatPenalty') as number,
|
||||||
|
frequency_penalty: config.get('llamaFrequencyPenalty,') as number,
|
||||||
|
presence_penalty: config.get('llamaPresencePenalty,') as number,
|
||||||
|
repetition_penalty_range: config.get('llamaRepeatCtx,') as number,
|
||||||
|
temperature: config.get('llamaTemperature') as number,
|
||||||
|
top_p: config.get('llamaTop_p') as number,
|
||||||
|
top_k: config.get('llamaTop_k') as number,
|
||||||
|
typical_p: config.get('llamaTypical_p') as number,
|
||||||
|
tfs: config.get('llamaTailfree_z,') as number,
|
||||||
|
seed: config.get('llamaSeed') as number,
|
||||||
|
stream: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
const fim = config.get('fimEnabled') as boolean;
|
||||||
|
|
||||||
|
if (fim === true) {
|
||||||
|
const fim_beg = config.get('fimBeginString') as string;
|
||||||
|
const fim_hole = config.get('fimHoleString') as string;
|
||||||
|
const fim_end = config.get('fimEndString') as string;
|
||||||
|
request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
|
||||||
|
} else {
|
||||||
|
request.prompt = doc_before;
|
||||||
|
}
|
||||||
|
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for now only completions is implemented
|
||||||
|
export function OpenAIAPIRequestEndpoint(config: vscode.WorkspaceConfiguration): string {
|
||||||
|
return '/v1/completions';
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user