4 changed files with 69 additions and 153 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -23,7 +23,7 @@
 			"request": "launch",
 			"args": [
 				"--extensionDevelopmentPath=${workspaceFolder}",
-				"--extensionTestsPath=${workspaceFolder}/out/test/"
+				"--extensionTestsPath=${workspaceFolder}/out/test/suite/index"
 			],
 			"outFiles": [
 				"${workspaceFolder}/out/test/**/*.js"
--- a/TODO.md
+++ b/TODO.md
@ -1,12 +1,6 @@
 [x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */  
 [] - test cancel token
 [] - add fill in middle
-[x] - add config option to disable the extension
+[] - add config option to disable the extension
 [] - add command to test and query connection to server
-[x] - add feedback when waiting response
-[] - add a chat window
-[] - if the model is an instruct-type add the system prompt to the chat
-[] - add an icon
-[] - option to backup and restore model settings
-[] - add a window to quickly modify model configs
-[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample
+[] - add feedback when waiting response
--- a/package.json
+++ b/package.json
@ -122,54 +122,7 @@
                "when": "editorTextFocus",
                "command": "editor.action.inlineSuggest.trigger"
            }
-        ],
-        "configuration": {
-            "title": "dumbpilot",
-            "properties": {
-                "dumbpilot.completionEnabled": {
-                    "type": "boolean",
-                    "default": true,
-                    "description": "Enable predictive code completion"
-                },
-                "dumbpilot.fimEnabled": {
-                    "type": "boolean",
-                    "default": false,
-                    "description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
-                },
-                "dumbpilot.llamaHost": {
-                    "type": "string", 
-                    "default": "http://0.0.0.0:8080",
-                    "description": "llama.cpp server address"
-                },
-                "dumbpilot.llamaCtxsize": {"type": "number", "default": 2048}, 
-                "dumbpilot.llamaMaxtokens": {"type": "number", "default": -1}, 
-                "dumbpilot.llamaMirostat": {"type": "number", "default": 0}, 
-                "dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11}, 
-                "dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0}, 
-                "dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0}, 
-                "dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256}, 
-                "dumbpilot.llamaTemperature": {"type": "number", "default": 0.25}, 
-                "dumbpilot.llamaTop_p": {"type": "number", "default": 0.95}, 
-                "dumbpilot.llamaTop_k": {"type": "number", "default": 40}, 
-                "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, 
-                "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, 
-                "dumbpilot.llamaSeed": {"type": "number", "default": -1},
-                "dumbpilot.llamaCachePrompt": {
-                    "type": "boolean",
-                    "default": true,
-                    "description": "Enable prompt caching for faster results"
-                },
-                "dumbpilot.llamaInstructModel": {
-                    "type": "boolean",
-                    "default": "false",
-                    "description": "For use with instruct models"
-                },
-                "dumbpilot.llamaSystemPrompt": {
-                    "type": "string",
-                    "description": "The system prompt that the model considers at the beginning of every request, used by instruct models"
-                }
-            }
-        }
+        ]
    },
    "scripts": {
        "vscode:prepublish": "npm run compile",
--- a/src/extension.ts
+++ b/src/extension.ts
@ -29,7 +29,7 @@ type llamaData = {
 	truncated: boolean
 };

-type llamaRequest = {
+type llamaCompletionRequest = {
 	n_predict: number,
 	mirostat: number,
 	repeat_penalty: number,
@ -43,21 +43,45 @@ type llamaRequest = {
 	tfs_z: number, 
 	seed: number,
 	stream: boolean,
-	cache_prompt: boolean,
-	prompt?: string,
-	input_prefix?: string,
-	input_suffix?: string
+	prompt: string,
 };

-type fetchErrorCause = {
-	errno: number,
-	code: string,
-	syscall: string,
-	address: string,
-	port: number
+type llamaFillRequest = {
+	n_predict: number,
+	mirostat: number,
+	repeat_penalty: number,
+	frequency_penalty: number, 
+	presence_penalty: number, 
+	repeat_last_n: number, 
+	temperature: number,
+	top_p: number,
+	top_k: number,
+	typical_p: number,
+	tfs_z: number, 
+	seed: number,
+	stream: boolean,
+	input_prefix: string,
+	input_suffix: string,
 };


+const llama_ctxsize = 2048;
+const llama_maxtokens = -1;
+const llama_mirostat = 0;
+const llama_repeat_penalty = 1.11;
+const llama_frequency_penalty = 0.0;
+const llama_presence_penalty = 0.0;
+const llama_repeat_ctx = 256;
+const llama_temperature = 0.25;
+const llama_top_p = 0.95;
+const llama_top_k = 40;
+const llama_typical_p = 0.95;
+const llama_tailfree_z = 0.5;
+const llama_session_seed = -1;
+
+const llama_host = "http://0.0.0.0:8080";
+
+
 // clean up the document
 function clean_text(txt: string): string {
 	// these are already done by JSON.stringify()
@ -70,54 +94,22 @@ function clean_text(txt: string): string {
 }


-// Show a message notification with a set timeout
-async function showMessageWithTimeout(message: string, timeout: number): Promise<void> {
-    void vscode.window.withProgress(
-        {
-            location: vscode.ProgressLocation.Notification,
-            title: message,
-            cancellable: false,
-        }, (progress, token) => {
-			token.onCancellationRequested(() => {});
-
-			// This is magic I don't understand
-			const p = new Promise<void>((resolve) => {
-				setTimeout(resolve, timeout);
-			});
-			return p;
-        });
-};
-
-
-// show a message on the status bar until the promise is resolved
-async function showPendingStatusBar(message: string, operation: Promise<any>): Promise<void> {
-	void vscode.window.withProgress(
-        {
-            location: vscode.ProgressLocation.Window,
-            title: message,
-        }, () => operation ).then((aok) => {}, (err) => {});
-		// we already resolve the operation elsewhere
-}
-
-
 export function activate(context: vscode.ExtensionContext) {

 	console.log('dumbpilot is now active');

-	let config = vscode.workspace.getConfiguration("dumbpilot");
-
-	// handle completion changes
-	context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
-		config = vscode.workspace.getConfiguration("dumbpilot");
-	}));
+	const config = vscode.workspace.getConfiguration("dumbpilot");
+	var completion_enabled: boolean = config.get("completionEnabled") as boolean;

 	// TODO: work with local configurations
 	let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
+		completion_enabled = true;
 		config.update("completionEnabled", true);
 	});
 	context.subscriptions.push(disposable);

 	disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
+		completion_enabled = false;
 		config.update("completionEnabled", false);
 	});

@ -127,12 +119,11 @@ export function activate(context: vscode.ExtensionContext) {
 	const provider: vscode.InlineCompletionItemProvider = {
 		async provideInlineCompletionItems(document, position, context, token) {

-			// disable if predictive completion is disabled
-			if (config.get("completionEnabled") as boolean === false) {
+			if (completion_enabled === false) {
 				return null;
 			}
 			
-			// Since for every completion we will query the server, we want to filter out
+			// Since for every completion we want to query the server, we want to filter out
 			// automatic completion invokes
 			if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
 				return null;
@ -153,8 +144,8 @@ export function activate(context: vscode.ExtensionContext) {
 			// Get the document's text and position to send to the model
 			const doc_text = document.getText();
 			const doc_off = document.offsetAt(position);
-			let doc_before = doc_text.substring(0, doc_off);
-			let doc_after = doc_text.substring(doc_off);
+			var doc_before = doc_text.substring(0, doc_off);
+			var doc_after = doc_text.substring(doc_off);
 			
 			// make it cleaner in hope to reduce the number of tokens
 			doc_before = clean_text(doc_before);
@ -163,48 +154,37 @@ export function activate(context: vscode.ExtensionContext) {
 			// TODO: prune text up to a maximum context length

 			// Prefix the filename in a comment
-			let pfx: string, sfx: string;
-			const fname = document.fileName.split('/').at(-1);
+			var pfx: string, sfx: string;
 			const lang = document.languageId;
 			const prefixes = commentPrefix;
 			pfx = (prefixes as any)[lang][0] as string;
 			sfx = (prefixes as any)[lang][1] as string;
 			// FIXME: is there a more efficient way?
-			doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
+			doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before;

-			const fim = config.get("fimEnabled") as boolean;
-			let req_str: string;
-			let request: llamaRequest = {
-				n_predict: config.get("llamaMaxtokens") as number,
-				mirostat: config.get("llamaMirostat") as number,
-				repeat_penalty: config.get("llamaRepeatPenalty") as number,
-				frequency_penalty: config.get("llamaFrequencyPenalty,") as number,
-				presence_penalty: config.get("llamaPresencePenalty,") as number,
-				repeat_last_n: config.get("llamaRepeatCtx,") as number,
-				temperature: config.get("llamaTemperature") as number,
-				top_p: config.get("llamaTop_p") as number,
-				top_k: config.get("llamaTop_k") as number,
-				typical_p: config.get("llamaTypical_p") as number,
-				tfs_z: config.get("llamaTailfree_z,") as number,
-				seed: config.get("llamaSeed") as number,
+			// server request object
+			const request: llamaCompletionRequest = {
+				n_predict: llama_maxtokens,
+				mirostat: llama_mirostat,
+				repeat_penalty: llama_repeat_penalty,
+				frequency_penalty: llama_frequency_penalty, 
+				presence_penalty: llama_presence_penalty, 
+				repeat_last_n: llama_repeat_ctx, 
+				temperature: llama_temperature,
+				top_p: llama_top_p,
+				top_k: llama_top_k,
+				typical_p: llama_typical_p,
+				tfs_z: llama_tailfree_z, 
+				seed: llama_session_seed,
 				stream: false,
-				cache_prompt: config.get("llamaCachePrompt") as boolean
+				prompt: doc_before,
 			};
-			
-			if (fim === true) {
-				req_str = '/infill';
-				request.input_prefix = doc_before;
-				request.input_suffix = doc_after;
-			} else {
-				req_str = '/completion';
-				request.prompt = doc_before;
-			}
-			
-			let data: llamaData;
+
+			var data: llamaData;
 			// try to send the request to the running server
 			try {
-				const response_promise = fetch(
-					(config.get("llamaHost") as string).concat(req_str),
+				const response = await fetch(
+					llama_host.concat('/completion'),
 					{
 						method: 'POST',
 						headers: {
@ -213,25 +193,14 @@ export function activate(context: vscode.ExtensionContext) {
 						body: JSON.stringify(request)
 					}
 				);
-
-				showPendingStatusBar("dumbpilot waiting", response_promise);
-				const response = await response_promise;
 				if (response.ok === false) {
 					throw new Error("llama server request is not ok??");
 				}
 		
 				data = await response.json() as llamaData;
-				const gen_tokens = data.timings.predicted_n;
-				const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
-				showMessageWithTimeout(`predicted ${gen_tokens} tokens in ${gen_time} seconds`, 1500);

 			} catch (e: any) {
-				const err = e as TypeError;
-				const cause: fetchErrorCause = err.cause as fetchErrorCause;
-				const estr: string = err.message + ' ' + cause.code + ' at ' + cause.address + ':' + cause.port;
-				// let the user know something went wrong
-				// TODO: maybe add a retry action or something
-				showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
+				console.log('dumbpilot: ' + e.message);
 				return null;
 			};