@ -1,62 +1,24 @@
import { ok } from 'assert' ;
import * as vscode from 'vscode' ;
import commentPrefix from './comments.json' ;
// llama.cpp server response format
type llamaData = {
content : string ,
generation_settings : JSON ,
model : string ,
prompt : string ,
stopped_eos : boolean ,
stopped_limit : boolean ,
stopped_word : boolean ,
stopping_word : string ,
timings : {
predicted_ms : number ,
predicted_n : number ,
predicted_per_second : number ,
predicted_per_token_ms : number ,
prompt_ms : number ,
prompt_n : number ,
prompt_per_second : number ,
prompt_per_token_ms : number
} ,
tokens_cached : number ,
tokens_evaluated : number ,
tokens_predicted : number ,
truncated : boolean
} ;
type llamaRequest = {
n_predict : number ,
mirostat : number ,
repeat_penalty : number ,
frequency_penalty : number ,
presence_penalty : number ,
repeat_last_n : number ,
temperature : number ,
top_p : number ,
top_k : number ,
typical_p : number ,
tfs_z : number ,
seed : number ,
stream : boolean ,
cache_prompt : boolean ,
prompt? : string ,
input_prefix? : string ,
input_suffix? : string
} ;
type fetchErrorCause = {
errno : number ,
code : string ,
syscall : string ,
address : string ,
port : number
} ;
import {
LlamaRequest ,
createLlamacppRequest ,
llamacppRequestEndpoint ,
llamacppMakeRequest ,
} from './llamacpp-api' ;
import {
OpenAICompletionRequest ,
createOpenAIAPIRequest ,
openAIAPIRequestEndpoint ,
openAIMakeRequest ,
} from './openai-api' ;
import {
FetchErrorCause ,
ResponseData ,
showMessageWithTimeout ,
showPendingStatusBar ,
} from './common' ;
// clean up the document
function clean_text ( txt : string ) : string {
@ -69,56 +31,26 @@ function clean_text(txt: string): string {
return txt ;
}
// Show a message notification with a set timeout
async function showMessageWithTimeout ( message : string , timeout : number ) : Promise < void > {
void vscode . window . withProgress (
{
location : vscode.ProgressLocation.Notification ,
title : message ,
cancellable : false ,
} , ( progress , token ) = > {
token . onCancellationRequested ( ( ) = > { } ) ;
// This is magic I don't understand
const p = new Promise < void > ( ( resolve ) = > {
setTimeout ( resolve , timeout ) ;
} ) ;
return p ;
} ) ;
} ;
// show a message on the status bar until the promise is resolved
async function showPendingStatusBar ( message : string , operation : Promise < any > ) : Promise < void > {
void vscode . window . withProgress (
{
location : vscode.ProgressLocation.Window ,
title : message ,
} , ( ) = > operation ) . then ( ( aok ) = > { } , ( err ) = > { } ) ;
// we already resolve the operation elsewhere
}
export function activate ( context : vscode.ExtensionContext ) {
console . log ( 'dumbpilot is now active' ) ;
let config = vscode . workspace . getConfiguration ( "dumbpilot" ) ;
let config = vscode . workspace . getConfiguration ( 'dumbpilot' ) ;
// handle completion changes
context . subscriptions . push ( vscode . workspace . onDidChangeConfiguration ( e = > {
config = vscode . workspace . getConfiguration ( "dumbpilot" ) ;
} ) ) ;
context . subscriptions . push (
vscode . workspace . onDidChangeConfiguration ( ( e ) = > {
config = vscode . workspace . getConfiguration ( 'dumbpilot' ) ;
} )
) ;
// TODO: work with local configurations
let disposable = vscode . commands . registerCommand ( "dumbpilot.enableCompletion" , ( ) = > {
config . update ( "completionEnabled" , true ) ;
let disposable = vscode . commands . registerCommand ( 'dumbpilot.enableCompletion' , ( ) = > {
config . update ( 'completionEnabled' , true ) ;
} ) ;
context . subscriptions . push ( disposable ) ;
disposable = vscode . commands . registerCommand ( "dumbpilot.disableCompletion" , ( ) = > {
config . update ( "completionEnabled" , false ) ;
disposable = vscode . commands . registerCommand ( 'dumbpilot.disableCompletion' , ( ) = > {
config . update ( 'completionEnabled' , false ) ;
} ) ;
// Register a new provider of inline completions, this does not decide how it is invoked
@ -126,9 +58,8 @@ export function activate(context: vscode.ExtensionContext) {
// https://github.com/microsoft/vscode-extension-samples/blob/main/inline-completions/src/extension.ts
const provider : vscode.InlineCompletionItemProvider = {
async provideInlineCompletionItems ( document , position , context , token ) {
// disable if predictive completion is disabled
if ( config . get ( "completionEnabled" ) as boolean === false ) {
if ( ( config . get ( 'completionEnabled' ) as boolean ) === false ) {
return null ;
}
@ -140,14 +71,14 @@ export function activate(context: vscode.ExtensionContext) {
// FIXME: I don't know if this works
token . onCancellationRequested ( ( ) = > {
console . log ( "dumbpilot: operation cancelled, may still be running on the server" ) ;
console . log ( 'dumbpilot: operation cancelled, may still be running on the server' ) ;
return null ;
} ) ;
//console.log('dumbpilot: completion invoked at position: line=' + position.line + ' char=' + position.character);
const result : vscode.InlineCompletionList = {
items : [ ]
items : [ ] ,
} ;
// Get the document's text and position to send to the model
@ -172,86 +103,30 @@ export function activate(context: vscode.ExtensionContext) {
// FIXME: is there a more efficient way?
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before ;
const fim = config . get ( "fimEnabled" ) as boolean ;
const fimRequest = config . get ( "useFillInMiddleRequest" ) as boolean ;
let req_str : string ;
let request : llamaRequest = {
n_predict : config.get ( "llamaMaxtokens" ) as number ,
mirostat : config.get ( "llamaMirostat" ) as number ,
repeat_penalty : config.get ( "llamaRepeatPenalty" ) as number ,
frequency_penalty : config.get ( "llamaFrequencyPenalty," ) as number ,
presence_penalty : config.get ( "llamaPresencePenalty," ) as number ,
repeat_last_n : config.get ( "llamaRepeatCtx," ) as number ,
temperature : config.get ( "llamaTemperature" ) as number ,
top_p : config.get ( "llamaTop_p" ) as number ,
top_k : config.get ( "llamaTop_k" ) as number ,
typical_p : config.get ( "llamaTypical_p" ) as number ,
tfs_z : config.get ( "llamaTailfree_z," ) as number ,
seed : config.get ( "llamaSeed" ) as number ,
stream : false ,
cache_prompt : config.get ( "llamaCachePrompt" ) as boolean
} ;
// check if fill in middle is enabled and fill the request prompt accordingly
if ( fim === true ) {
if ( fimRequest === true ) {
req_str = '/infill' ;
request . input_prefix = doc_before ;
request . input_suffix = doc_after ;
} else {
const fim_beg = config . get ( "fimBeginString" ) as string ;
const fim_hole = config . get ( "fimHoleString" ) as string ;
const fim_end = config . get ( "fimEndString" ) as string ;
req_str = '/completion' ;
request . prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end ;
}
// actially make the request
let data : ResponseData = { content : '' , tokens : 0 , time : 0 } ;
if ( config . get ( 'llamaUseOpenAIAPI' ) === true ) {
const request : OpenAICompletionRequest = createOpenAIAPIRequest (
config ,
doc_before ,
doc_after
) ;
const endpoint : string = openAIAPIRequestEndpoint ( config ) ;
data = await openAIMakeRequest ( request , endpoint ) ;
} else {
req_str = '/completion' ;
request . prompt = doc_before ;
const request : LlamaRequest = createLlamacppRequest ( config , doc_before , doc_after ) ;
const endpoint : string = llamacppRequestEndpoint ( config ) ;
data = await llamacppMakeRequest ( request , endpoint ) ;
}
console . log ( JSON . stringify ( request ) ) ;
let data : llamaData ;
// try to send the request to the running server
try {
const response_promise = fetch (
( config . get ( "llamaHost" ) as string ) . concat ( req_str ) ,
{
method : 'POST' ,
headers : {
'content-type' : 'application/json; charset=UTF-8'
} ,
body : JSON.stringify ( request )
}
) ;
showPendingStatusBar ( "dumbpilot waiting" , response_promise ) ;
const response = await response_promise ;
if ( response . ok === false ) {
throw new Error ( "llama server request is not ok??" ) ;
}
data = await response . json ( ) as llamaData ;
const gen_tokens = data . timings . predicted_n ;
const gen_time = ( data . timings . predicted_ms / 1000 ) . toFixed ( 2 ) ;
showMessageWithTimeout ( ` predicted ${ gen_tokens } tokens in ${ gen_time } seconds ` , 1500 ) ;
} catch ( e : any ) {
const err = e as TypeError ;
const cause : fetchErrorCause = err . cause as fetchErrorCause ;
const estr : string = err . message + ' ' + cause . code + ' at ' + cause . address + ':' + cause . port ;
// let the user know something went wrong
// TODO: maybe add a retry action or something
showMessageWithTimeout ( 'dumbpilot error: ' + estr , 3000 ) ;
return null ;
} ;
result . items . push ( { insertText : data.content , range : new vscode . Range ( position , position ) } ) ;
result . items . push ( {
insertText : data.content ,
range : new vscode . Range ( position , position ) ,
} ) ;
return result ;
} ,
} ;
vscode . languages . registerInlineCompletionItemProvider ( { pattern : '**' } , provider ) ;
vscode . languages . registerInlineCompletionItemProvider ( { pattern : '**' } , provider ) ;
}
// This method is called when your extension is deactivated