diff --git a/TODO.md b/TODO.md index 73164f5..f4b3899 100644 --- a/TODO.md +++ b/TODO.md @@ -1,4 +1,4 @@ -[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */ +[x] - in extensions.json add suffix for languages that require it such as css where comments are: /* stuff */ [] - test cancel token [] - add fill in middle [x] - add config option to disable the extension @@ -9,4 +9,5 @@ [] - add an icon [] - option to backup and restore model settings [] - add a window to quickly modify model configs -[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample \ No newline at end of file +[] - decorate ai generated text https://github.com/microsoft/vscode-extension-samples/tree/main/decorator-sample +[] - when trying to use completion when there is an active selection either substitute the selection or use the selection as context instead of the whole file \ No newline at end of file diff --git a/package.json b/package.json index e400124..09b0419 100644 --- a/package.json +++ b/package.json @@ -137,23 +137,79 @@ "description": "Enable Fill in Middle mode, defaults to Up-to cursor context" }, "dumbpilot.llamaHost": { - "type": "string", + "type": "string", "default": "http://0.0.0.0:8080", "description": "llama.cpp server address" }, - "dumbpilot.llamaCtxsize": {"type": "number", "default": 2048}, - "dumbpilot.llamaMaxtokens": {"type": "number", "default": -1}, - "dumbpilot.llamaMirostat": {"type": "number", "default": 0}, - "dumbpilot.llamaRepeatPenalty": {"type": "number", "default": 1.11}, - "dumbpilot.llamaFrequencyPenalty": {"type": "number", "default": 0.0}, - "dumbpilot.llamaPresencePenalty": {"type": "number", "default": 0.0}, - "dumbpilot.llamaRepeatCtx": {"type": "number", "default": 256}, - "dumbpilot.llamaTemperature": {"type": "number", "default": 0.25}, - "dumbpilot.llamaTop_p": {"type": "number", "default": 0.95}, - "dumbpilot.llamaTop_k": {"type": "number", "default": 40}, - "dumbpilot.llamaTypical_p": {"type": "number", "default": 0.95}, - "dumbpilot.llamaTailfree_z": {"type": "number", "default": 0.5}, - "dumbpilot.llamaSeed": {"type": "number", "default": -1}, + "dumbpilot.llamaCtxsize": { + "type": "number", + "default": 2048 + }, + "dumbpilot.llamaMaxtokens": { + "type": "number", + "default": -1 + }, + "dumbpilot.llamaMirostat": { + "type": "number", + "default": 0 + }, + "dumbpilot.llamaRepeatPenalty": { + "type": "number", + "default": 1.11 + }, + "dumbpilot.llamaFrequencyPenalty": { + "type": "number", + "default": 0.0 + }, + "dumbpilot.llamaPresencePenalty": { + "type": "number", + "default": 0.0 + }, + "dumbpilot.llamaRepeatCtx": { + "type": "number", + "default": 256 + }, + "dumbpilot.llamaTemperature": { + "type": "number", + "default": 0.25 + }, + "dumbpilot.llamaTop_p": { + "type": "number", + "default": 0.95 + }, + "dumbpilot.llamaTop_k": { + "type": "number", + "default": 40 + }, + "dumbpilot.llamaTypical_p": { + "type": "number", + "default": 0.95 + }, + "dumbpilot.llamaTailfree_z": { + "type": "number", + "default": 0.5 + }, + "dumbpilot.llamaSeed": { + "type": "number", + "default": -1 + }, + "dumbpilot.fimBeginString": { + "type": "string", + "default": "<|fim▁begin|>" + }, + "dumbpilot.fimHoleString": { + "type": "string", + "default": "<|fim▁hole|>" + }, + "dumbpilot.fimEndString": { + "type": "string", + "default": "<|fim▁end|>" + }, + "dumbpilot.useFillInMiddleRequest": { + "type": "boolean", + "default": false, + "description": "Use the fill in middle request type provided by llama.cpp server, otherwise use the FIM token strings to delimit the text" + }, "dumbpilot.llamaCachePrompt": { "type": "boolean", "default": true, @@ -161,7 +217,7 @@ }, "dumbpilot.llamaInstructModel": { "type": "boolean", - "default": "false", + "default": false, "description": "For use with instruct models" }, "dumbpilot.llamaSystemPrompt": { diff --git a/src/extension.ts b/src/extension.ts index 3cf65e5..282cdf1 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -33,14 +33,14 @@ type llamaRequest = { n_predict: number, mirostat: number, repeat_penalty: number, - frequency_penalty: number, - presence_penalty: number, - repeat_last_n: number, + frequency_penalty: number, + presence_penalty: number, + repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, - tfs_z: number, + tfs_z: number, seed: number, stream: boolean, cache_prompt: boolean, @@ -63,7 +63,7 @@ function clean_text(txt: string): string { // these are already done by JSON.stringify() //txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n"); //txt = txt.replace((/\t/gm, "\\t")); - + // FIXME: I don't know if this penalizes some results since most people indent with spaces //txt = txt.replace(/\s+/gm, " "); return txt; @@ -131,7 +131,7 @@ export function activate(context: vscode.ExtensionContext) { if (config.get("completionEnabled") as boolean === false) { return null; } - + // Since for every completion we will query the server, we want to filter out // automatic completion invokes if (context.triggerKind === vscode.InlineCompletionTriggerKind.Automatic) { @@ -155,7 +155,7 @@ export function activate(context: vscode.ExtensionContext) { const doc_off = document.offsetAt(position); let doc_before = doc_text.substring(0, doc_off); let doc_after = doc_text.substring(doc_off); - + // make it cleaner in hope to reduce the number of tokens doc_before = clean_text(doc_before); doc_after = clean_text(doc_after); @@ -173,6 +173,7 @@ export function activate(context: vscode.ExtensionContext) { doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before; const fim = config.get("fimEnabled") as boolean; + const fimRequest = config.get("useFillInMiddleRequest") as boolean; let req_str: string; let request: llamaRequest = { n_predict: config.get("llamaMaxtokens") as number, @@ -190,16 +191,27 @@ export function activate(context: vscode.ExtensionContext) { stream: false, cache_prompt: config.get("llamaCachePrompt") as boolean }; - + + // check if fill in middle is enabled and fill the request prompt accordingly if (fim === true) { - req_str = '/infill'; - request.input_prefix = doc_before; - request.input_suffix = doc_after; + if (fimRequest === true) { + req_str = '/infill'; + request.input_prefix = doc_before; + request.input_suffix = doc_after; + } else { + const fim_beg = config.get("fimBeginString") as string; + const fim_hole = config.get("fimHoleString") as string; + const fim_end = config.get("fimEndString") as string; + req_str = '/completion'; + request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end; + } } else { req_str = '/completion'; request.prompt = doc_before; } - + + console.log(JSON.stringify(request)); + let data: llamaData; // try to send the request to the running server try { @@ -219,7 +231,7 @@ export function activate(context: vscode.ExtensionContext) { if (response.ok === false) { throw new Error("llama server request is not ok??"); } - + data = await response.json() as llamaData; const gen_tokens = data.timings.predicted_n; const gen_time = (data.timings.predicted_ms / 1000).toFixed(2); @@ -234,7 +246,7 @@ export function activate(context: vscode.ExtensionContext) { showMessageWithTimeout('dumbpilot error: ' + estr, 3000); return null; }; - + result.items.push({insertText: data.content, range: new vscode.Range(position, position)}); return result; },