import { ok } from 'assert';
import * as vscode from 'vscode';
import commentPrefix from './comments.json';


// llama.cpp server response format
type llamaData = {
	content: string,
	generation_settings: JSON,
	model: string,
	prompt: string,
	stopped_eos: boolean,
	stopped_limit: boolean,
	stopped_word: boolean,
	stopping_word: string,
	timings: {
		predicted_ms: number,
		predicted_n: number,
		predicted_per_second: number,
		predicted_per_token_ms: number,
		prompt_ms: number,
		prompt_n: number,
		prompt_per_second: number,
		prompt_per_token_ms: number
	},
	tokens_cached: number,
	tokens_evaluated: number,
	tokens_predicted: number,
	truncated: boolean
};

type llamaCompletionRequest = {
	n_predict: number,
	mirostat: number,
	repeat_penalty: number,
	frequency_penalty: number, 
	presence_penalty: number, 
	repeat_last_n: number, 
	temperature: number,
	top_p: number,
	top_k: number,
	typical_p: number,
	tfs_z: number, 
	seed: number,
	stream: boolean,
	prompt: string,
};

type llamaFillRequest = {
	n_predict: number,
	mirostat: number,
	repeat_penalty: number,
	frequency_penalty: number, 
	presence_penalty: number, 
	repeat_last_n: number, 
	temperature: number,
	top_p: number,
	top_k: number,
	typical_p: number,
	tfs_z: number, 
	seed: number,
	stream: boolean,
	input_prefix: string,
	input_suffix: string,
};


const llama_ctxsize = 2048;
const llama_maxtokens = -1;
const llama_mirostat = 0;
const llama_repeat_penalty = 1.11;
const llama_frequency_penalty = 0.0;
const llama_presence_penalty = 0.0;
const llama_repeat_ctx = 256;
const llama_temperature = 0.25;
const llama_top_p = 0.95;
const llama_top_k = 40;
const llama_typical_p = 0.95;
const llama_tailfree_z = 0.5;
const llama_session_seed = -1;

const llama_host = "http://0.0.0.0:8080";


// clean up the document
function clean_text(txt: string): string {
	// these are already done by JSON.stringify()
	//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
	//txt = txt.replace((/\t/gm, "\\t"));
	
	// FIXME: I don't know if this penalizes some results since most people indent with spaces
	//txt = txt.replace(/\s+/gm, " ");
	return txt;
}


export function activate(context: vscode.ExtensionContext) {

	console.log('dumbpilot is now active');

	const config = vscode.workspace.getConfiguration("dumbpilot");
	var completion_enabled: boolean = config.get("completionEnabled") as boolean;

	// TODO: work with local configurations
	let disposable = vscode.commands.registerCommand("dumbpilot.enableCompletion", () => {
		completion_enabled = true;
		config.update("completionEnabled", true);
	});
	context.subscriptions.push(disposable);

	disposable = vscode.commands.registerCommand("dumbpilot.disableCompletion", () => {
		completion_enabled = false;
		config.update("completionEnabled", false);
	});

	// Register a new provider of inline completions, this does not decide how it is invoked
	// only what the completion should be
	// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
	const provider: vscode.InlineCompletionItemProvider = {
		async provideInlineCompletionItems(document, position, context, token) {

			if (completion_enabled === false) {
				return null;
			}
			
			// Since for every completion we want to query the server, we want to filter out
			// automatic completion invokes
			if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) {
				return null;
			}

			// FIXME: I don't know if this works
			token.onCancellationRequested(() => {
				console.log("dumbpilot: operation cancelled, may still be running on the server");
				return null;
			});

			//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);

			const result: vscode.InlineCompletionList = {
				items: []
			};

			// Get the document's text and position to send to the model
			const doc_text = document.getText();
			const doc_off = document.offsetAt(position);
			var doc_before = doc_text.substring(0, doc_off);
			var doc_after = doc_text.substring(doc_off);
			
			// make it cleaner in hope to reduce the number of tokens
			doc_before = clean_text(doc_before);
			doc_after = clean_text(doc_after);

			// TODO: prune text up to a maximum context length

			// Prefix the filename in a comment
			var pfx: string, sfx: string;
			const lang = document.languageId;
			const prefixes = commentPrefix;
			pfx = (prefixes as any)[lang][0] as string;
			sfx = (prefixes as any)[lang][1] as string;
			// FIXME: is there a more efficient way?
			doc_before = pfx + ' ' + document.fileName + sfx + '\n' + doc_before;

			// server request object
			const request: llamaCompletionRequest = {
				n_predict: llama_maxtokens,
				mirostat: llama_mirostat,
				repeat_penalty: llama_repeat_penalty,
				frequency_penalty: llama_frequency_penalty, 
				presence_penalty: llama_presence_penalty, 
				repeat_last_n: llama_repeat_ctx, 
				temperature: llama_temperature,
				top_p: llama_top_p,
				top_k: llama_top_k,
				typical_p: llama_typical_p,
				tfs_z: llama_tailfree_z, 
				seed: llama_session_seed,
				stream: false,
				prompt: doc_before,
			};

			var data: llamaData;
			// try to send the request to the running server
			try {
				const response = await fetch(
					llama_host.concat('/completion'),
					{
						method: 'POST',
						headers: {
							'content-type': 'application/json; charset=UTF-8'
						},
						body: JSON.stringify(request)
					}
				);
				if (response.ok === false) {
					throw new Error("llama server request is not ok??");
				}
		
				data = await response.json() as llamaData;

			} catch (e: any) {
				console.log('dumbpilot: ' + e.message);
				return null;
			};
			
			result.items.push({insertText: data.content, range: new vscode.Range(position, position)});
			return result;
		},
	};
	vscode.languages.registerInlineCompletionItemProvider({pattern: '**'}, provider);
}

// This method is called when your extension is deactivated
export function deactivate() {}