@ -33,14 +33,14 @@ type llamaRequest = {
n_predict : number ,
n_predict : number ,
mirostat : number ,
mirostat : number ,
repeat_penalty : number ,
repeat_penalty : number ,
frequency_penalty : number ,
frequency_penalty : number ,
presence_penalty : number ,
presence_penalty : number ,
repeat_last_n : number ,
repeat_last_n : number ,
temperature : number ,
temperature : number ,
top_p : number ,
top_p : number ,
top_k : number ,
top_k : number ,
typical_p : number ,
typical_p : number ,
tfs_z : number ,
tfs_z : number ,
seed : number ,
seed : number ,
stream : boolean ,
stream : boolean ,
cache_prompt : boolean ,
cache_prompt : boolean ,
@ -63,7 +63,7 @@ function clean_text(txt: string): string {
// these are already done by JSON.stringify()
// these are already done by JSON.stringify()
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace(/(\r\n|\n|\r)/gm, "\\n");
//txt = txt.replace((/\t/gm, "\\t"));
//txt = txt.replace((/\t/gm, "\\t"));
// FIXME: I don't know if this penalizes some results since most people indent with spaces
// FIXME: I don't know if this penalizes some results since most people indent with spaces
//txt = txt.replace(/\s+/gm, " ");
//txt = txt.replace(/\s+/gm, " ");
return txt ;
return txt ;
@ -131,7 +131,7 @@ export function activate(context: vscode.ExtensionContext) {
if ( config . get ( "completionEnabled" ) as boolean === false ) {
if ( config . get ( "completionEnabled" ) as boolean === false ) {
return null ;
return null ;
}
}
// Since for every completion we will query the server, we want to filter out
// Since for every completion we will query the server, we want to filter out
// automatic completion invokes
// automatic completion invokes
if ( context . triggerKind === vscode . InlineCompletionTriggerKind . Automatic ) {
if ( context . triggerKind === vscode . InlineCompletionTriggerKind . Automatic ) {
@ -155,7 +155,7 @@ export function activate(context: vscode.ExtensionContext) {
const doc_off = document . offsetAt ( position ) ;
const doc_off = document . offsetAt ( position ) ;
let doc_before = doc_text . substring ( 0 , doc_off ) ;
let doc_before = doc_text . substring ( 0 , doc_off ) ;
let doc_after = doc_text . substring ( doc_off ) ;
let doc_after = doc_text . substring ( doc_off ) ;
// make it cleaner in hope to reduce the number of tokens
// make it cleaner in hope to reduce the number of tokens
doc_before = clean_text ( doc_before ) ;
doc_before = clean_text ( doc_before ) ;
doc_after = clean_text ( doc_after ) ;
doc_after = clean_text ( doc_after ) ;
@ -173,6 +173,7 @@ export function activate(context: vscode.ExtensionContext) {
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before ;
doc_before = pfx + ' ' + fname + sfx + '\n' + doc_before ;
const fim = config . get ( "fimEnabled" ) as boolean ;
const fim = config . get ( "fimEnabled" ) as boolean ;
const fimRequest = config . get ( "useFillInMiddleRequest" ) as boolean ;
let req_str : string ;
let req_str : string ;
let request : llamaRequest = {
let request : llamaRequest = {
n_predict : config.get ( "llamaMaxtokens" ) as number ,
n_predict : config.get ( "llamaMaxtokens" ) as number ,
@ -190,16 +191,27 @@ export function activate(context: vscode.ExtensionContext) {
stream : false ,
stream : false ,
cache_prompt : config.get ( "llamaCachePrompt" ) as boolean
cache_prompt : config.get ( "llamaCachePrompt" ) as boolean
} ;
} ;
// check if fill in middle is enabled and fill the request prompt accordingly
if ( fim === true ) {
if ( fim === true ) {
req_str = '/infill' ;
if ( fimRequest === true ) {
request . input_prefix = doc_before ;
req_str = '/infill' ;
request . input_suffix = doc_after ;
request . input_prefix = doc_before ;
request . input_suffix = doc_after ;
} else {
const fim_beg = config . get ( "fimBeginString" ) as string ;
const fim_hole = config . get ( "fimHoleString" ) as string ;
const fim_end = config . get ( "fimEndString" ) as string ;
req_str = '/completion' ;
request . prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end ;
}
} else {
} else {
req_str = '/completion' ;
req_str = '/completion' ;
request . prompt = doc_before ;
request . prompt = doc_before ;
}
}
console . log ( JSON . stringify ( request ) ) ;
let data : llamaData ;
let data : llamaData ;
// try to send the request to the running server
// try to send the request to the running server
try {
try {
@ -219,7 +231,7 @@ export function activate(context: vscode.ExtensionContext) {
if ( response . ok === false ) {
if ( response . ok === false ) {
throw new Error ( "llama server request is not ok??" ) ;
throw new Error ( "llama server request is not ok??" ) ;
}
}
data = await response . json ( ) as llamaData ;
data = await response . json ( ) as llamaData ;
const gen_tokens = data . timings . predicted_n ;
const gen_tokens = data . timings . predicted_n ;
const gen_time = ( data . timings . predicted_ms / 1000 ) . toFixed ( 2 ) ;
const gen_time = ( data . timings . predicted_ms / 1000 ) . toFixed ( 2 ) ;
@ -234,7 +246,7 @@ export function activate(context: vscode.ExtensionContext) {
showMessageWithTimeout ( 'dumbpilot error: ' + estr , 3000 ) ;
showMessageWithTimeout ( 'dumbpilot error: ' + estr , 3000 ) ;
return null ;
return null ;
} ;
} ;
result . items . push ( { insertText : data.content , range : new vscode . Range ( position , position ) } ) ;
result . items . push ( { insertText : data.content , range : new vscode . Range ( position , position ) } ) ;
return result ;
return result ;
} ,
} ,