diff --git a/TODO.md b/TODO.md
index 73164f5..f4b3899 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,4 +1,4 @@
-[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */  
+[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */
 [] - test cancel token
 [] - add fill in middle
 [x] - add config option to disable the extension
@@ -9,4 +9,5 @@
 [] - add an icon
 [] - option to backup and restore model settings
 [] - add a window to quickly modify model configs
-[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample
\ No newline at end of file
+[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample
+[] - when trying to use completion when there is an active selection either substitute the selection or use the selection as context instead of the whole file
\ No newline at end of file
diff --git a/package.json b/package.json
index e400124..09b0419 100644
--- a/package.json
+++ b/package.json
@@ -137,23 +137,79 @@
                     "description": "Enable Fill in Middle mode, defaults to Up-to cursor context"
                 },
                 "dumbpilot.llamaHost": {
-                    "type": "string", 
+                    "type": "string",
                     "default": "http://0.0.0.0:8080",
                     "description": "llama.cpp server address"
                 },
-                "dumbpilot.llamaCtxsize": {"type": "number", "default": 2048}, 
-                "dumbpilot.llamaMaxtokens": {"type": "number", "default": -1}, 
-                "dumbpilot.llamaMirostat": {"type": "number", "default": 0}, 
-                "dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11}, 
-                "dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0}, 
-                "dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0}, 
-                "dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256}, 
-                "dumbpilot.llamaTemperature": {"type": "number", "default": 0.25}, 
-                "dumbpilot.llamaTop_p": {"type": "number", "default": 0.95}, 
-                "dumbpilot.llamaTop_k": {"type": "number", "default": 40}, 
-                "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, 
-                "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, 
-                "dumbpilot.llamaSeed": {"type": "number", "default": -1},
+                "dumbpilot.llamaCtxsize": {
+                    "type": "number",
+                    "default": 2048
+                },
+                "dumbpilot.llamaMaxtokens": {
+                    "type": "number",
+                    "default": -1
+                },
+                "dumbpilot.llamaMirostat": {
+                    "type": "number",
+                    "default": 0
+                },
+                "dumbpilot.llamaRepeatPenalty": {
+                    "type": "number",
+                    "default": 1.11
+                },
+                "dumbpilot.llamaFrequencyPenalty": {
+                    "type": "number",
+                    "default": 0.0
+                },
+                "dumbpilot.llamaPresencePenalty": {
+                    "type": "number",
+                    "default": 0.0
+                },
+                "dumbpilot.llamaRepeatCtx": {
+                    "type": "number",
+                    "default": 256
+                },
+                "dumbpilot.llamaTemperature": {
+                    "type": "number",
+                    "default": 0.25
+                },
+                "dumbpilot.llamaTop_p": {
+                    "type": "number",
+                    "default": 0.95
+                },
+                "dumbpilot.llamaTop_k": {
+                    "type": "number",
+                    "default": 40
+                },
+                "dumbpilot.llamaTypical_p": {
+                    "type": "number",
+                    "default": 0.95
+                },
+                "dumbpilot.llamaTailfree_z": {
+                    "type": "number",
+                    "default": 0.5
+                },
+                "dumbpilot.llamaSeed": {
+                    "type": "number",
+                    "default": -1
+                },
+                "dumbpilot.fimBeginString": {
+                    "type": "string",
+                    "default": "<｜fim▁begin｜>"
+                },
+                "dumbpilot.fimHoleString": {
+                    "type": "string",
+                    "default": "<｜fim▁hole｜>"
+                },
+                "dumbpilot.fimEndString": {
+                    "type": "string",
+                    "default": "<｜fim▁end｜>"
+                },
+                "dumbpilot.useFillInMiddleRequest": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text"
+                },
                 "dumbpilot.llamaCachePrompt": {
                     "type": "boolean",
                     "default": true,
@@ -161,7 +217,7 @@
                 },
                 "dumbpilot.llamaInstructModel": {
                     "type": "boolean",
-                    "default": "false",
+                    "default": false,
                     "description": "For use with instruct models"
                 },
                 "dumbpilot.llamaSystemPrompt": {
diff --git a/src/extension.ts b/src/extension.ts
index 3cf65e5..282cdf1 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -33,14 +33,14 @@ type llamaRequest = {
 	n_predict: number,
 	mirostat: number,
 	repeat_penalty: number,
-	frequency_penalty: number, 
-	presence_penalty: number, 
-	repeat_last_n: number, 
+	frequency_penalty: number,
+	presence_penalty: number,
+	repeat_last_n: number,
 	temperature: number,
 	top_p: number,
 	top_k: number,
 	typical_p: number,
-	tfs_z: number, 
+	tfs_z: number,
 	seed: number,
 	stream: boolean,
 	cache_prompt: boolean,
@@ -63,7 +63,7 @@ function clean_text(txt: string): string {
 	// these are already done by JSON.stringify()
 	//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
 	//txt = txt.replace((/\t/gm, "\\t"));
-	
+
 	// FIXME: I don't know if this penalizes some results since most people indent with spaces
 	//txt = txt.replace(/\s+/gm, " ");
 	return txt;
@@ -131,7 +131,7 @@ export function activate(context: vscode.ExtensionContext) {
 			if (config.get("completionEnabled") as boolean === false) {
 				return null;
 			}
-			
+
 			// Since for every completion we will query the server, we want to filter out
 			// automatic completion invokes
 			if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
@@ -155,7 +155,7 @@ export function activate(context: vscode.ExtensionContext) {
 			const doc_off = document.offsetAt(position);
 			let doc_before = doc_text.substring(0, doc_off);
 			let doc_after = doc_text.substring(doc_off);
-			
+
 			// make it cleaner in hope to reduce the number of tokens
 			doc_before = clean_text(doc_before);
 			doc_after = clean_text(doc_after);
@@ -173,6 +173,7 @@ export function activate(context: vscode.ExtensionContext) {
 			doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before;
 
 			const fim = config.get("fimEnabled") as boolean;
+			const fimRequest = config.get("useFillInMiddleRequest") as boolean;
 			let req_str: string;
 			let request: llamaRequest = {
 				n_predict: config.get("llamaMaxtokens") as number,
@@ -190,16 +191,27 @@ export function activate(context: vscode.ExtensionContext) {
 				stream: false,
 				cache_prompt: config.get("llamaCachePrompt") as boolean
 			};
-			
+
+			// check if fill in middle is enabled and fill the request prompt accordingly
 			if (fim === true) {
-				req_str = '/infill';
-				request.input_prefix = doc_before;
-				request.input_suffix = doc_after;
+				if (fimRequest === true) {
+					req_str = '/infill';
+					request.input_prefix = doc_before;
+					request.input_suffix = doc_after;
+				} else {
+					const fim_beg = config.get("fimBeginString") as string;
+					const fim_hole = config.get("fimHoleString") as string;
+					const fim_end = config.get("fimEndString") as string;
+					req_str = '/completion';
+					request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end;
+				}
 			} else {
 				req_str = '/completion';
 				request.prompt = doc_before;
 			}
-			
+
+			console.log(JSON.stringify(request));
+
 			let data: llamaData;
 			// try to send the request to the running server
 			try {
@@ -219,7 +231,7 @@ export function activate(context: vscode.ExtensionContext) {
 				if (response.ok === false) {
 					throw new Error("llama server request is not ok??");
 				}
-		
+
 				data = await response.json() as llamaData;
 				const gen_tokens = data.timings.predicted_n;
 				const gen_time = (data.timings.predicted_ms / 1000).toFixed(2);
@@ -234,7 +246,7 @@ export function activate(context: vscode.ExtensionContext) {
 				showMessageWithTimeout('dumbpilot error: ' + estr, 3000);
 				return null;
 			};
-			
+
 			result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
 			return result;
 		},