idk

2023-11-21 14:26:43 +01:00 · 2023-11-21 14:26:43 +01:00 · a599d44c10
commit a599d44c10
parent 6bb3add1be
2 changed files with 34 additions and 27 deletions
--- a/package.json
+++ b/package.json
@ -153,7 +153,12 @@
                "dumbpilot.llamaTop_k": {"type": "number", "default": 40}, 
                "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, 
                "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, 
-                "dumbpilot.llamaSeed": {"type": "number", "default": -1}
+                "dumbpilot.llamaSeed": {"type": "number", "default": -1},
                "dumbpilot.llamaCachePrompt": {
                    "type": "bool",
                    "default": true,
                    "description": "Enable prompt caching for faster results"
                }
            }
        }
    },
--- a/src/extension.ts
+++ b/src/extension.ts
@ -29,7 +29,7 @@ type llamaData = {
 	truncated: boolean
 };
-type llamaCompletionRequest = {
+type llamaRequest = {
 	n_predict: number,
 	mirostat: number,
 	repeat_penalty: number,
@ -43,25 +43,10 @@ type llamaCompletionRequest = {
 	tfs_z: number, 
 	seed: number,
 	stream: boolean,
-	prompt: string,
+	cache_prompt: boolean,
-};
+	prompt?: string,
-
+	input_prefix?: string,
-type llamaFillRequest = {
+	input_suffix?: string
 	n_predict: number,
 	mirostat: number,
 	repeat_penalty: number,
 	frequency_penalty: number, 
 	presence_penalty: number, 
 	repeat_last_n: number, 
 	temperature: number,
 	top_p: number,
 	top_k: number,
 	typical_p: number,
 	tfs_z: number, 
 	seed: number,
 	stream: boolean,
 	input_prefix: string,
 	input_suffix: string
 };
 type fetchErrorCause = {
@ -118,7 +103,13 @@ export function activate(context: vscode.ExtensionContext) {
 	console.log('dumbpilot is now active');
-	const config = vscode.workspace.getConfiguration("dumbpilot");
+	let config = vscode.workspace.getConfiguration("dumbpilot");
 	// handle completion changes
 	context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
 		config = vscode.workspace.getConfiguration("dumbpilot");
 	}));
 	let completion_enabled: boolean = config.get("completionEnabled") as boolean;
 	// TODO: work with local configurations
@ -183,8 +174,9 @@ export function activate(context: vscode.ExtensionContext) {
 			// FIXME: is there a more efficient way?
 			doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
-			// server request object
+			const fim = config.get("fimEnabled") as boolean;
-			const request: llamaCompletionRequest = {
+			let req_str: string;
 			let request: llamaRequest = {
 				n_predict: config.get("llamaMaxtokens") as number,
 				mirostat: config.get("llamaMirostat") as number,
 				repeat_penalty: config.get("llamaRepeatPenalty") as number,
@ -198,14 +190,24 @@ export function activate(context: vscode.ExtensionContext) {
 				tfs_z: config.get("llamaTailfree_z,") as number,
 				seed: config.get("llamaSeed") as number,
 				stream: false,
-				prompt: doc_before,
+				cache_prompt: config.get("llamaCachePrompt") as boolean
 			};
-
+			
 			if (fim === true) {
 				req_str = '/infill';
 				request.input_prefix = doc_before;
 				request.input_suffix = doc_after;
 			} else {
 				req_str = '/completion';
 				request.prompt = doc_before;
 			}
 			console.log(fim);
 			let data: llamaData;
 			// try to send the request to the running server
 			try {
 				const response_promise = fetch(
-					(config.get("llamaHost") as string).concat('/completion'),
+					(config.get("llamaHost") as string).concat(req_str),
 					{
 						method: 'POST',
 						headers: {