Skip to content

Commit b08b371

Browse files
committed
allow hordeconfig to set a max ctx length too.
1 parent 278427d commit b08b371

File tree

2 files changed

+15
-9
lines changed

2 files changed

+15
-9
lines changed

gpttype_adapter.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
11291129
params.n_threads = original_threads;
11301130
time1 = timer_check();
11311131
timer_start();
1132-
printf("\n");
1132+
if(debugmode!=-1)
1133+
{
1134+
printf("\n");
1135+
}
11331136
}
11341137

11351138
unsigned int eosID = 0;

koboldcpp.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,11 @@ def utfprint(str):
221221
#################################################################
222222
friendlymodelname = "concedo/koboldcpp" # local kobold api apparently needs a hardcoded known HF model name
223223
maxctx = 2048
224-
maxlen = 256
224+
maxhordectx = 1024
225+
maxhordelen = 256
225226
modelbusy = False
226227
defaultport = 5001
227-
KcppVersion = "1.31.1"
228+
KcppVersion = "1.31.2"
228229
showdebug = True
229230

230231
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
@@ -353,7 +354,7 @@ async def handle_request(self, genparams, newprompt, basic_api_flag, stream_flag
353354

354355

355356
def do_GET(self):
356-
global maxctx, maxlen, friendlymodelname, KcppVersion, streamLock
357+
global maxctx, maxhordelen, friendlymodelname, KcppVersion, streamLock
357358
self.path = self.path.rstrip('/')
358359
response_body = None
359360

@@ -379,10 +380,10 @@ def do_GET(self):
379380
response_body = (json.dumps({'result': friendlymodelname }).encode())
380381

381382
elif self.path.endswith(('/api/v1/config/max_length', '/api/latest/config/max_length')):
382-
response_body = (json.dumps({"value": maxlen}).encode())
383+
response_body = (json.dumps({"value": maxhordelen}).encode())
383384

384385
elif self.path.endswith(('/api/v1/config/max_context_length', '/api/latest/config/max_context_length')):
385-
response_body = (json.dumps({"value": maxctx}).encode())
386+
response_body = (json.dumps({"value": min(maxctx,maxhordectx)}).encode())
386387

387388
elif self.path.endswith(('/api/v1/config/soft_prompt', '/api/latest/config/soft_prompt')):
388389
response_body = (json.dumps({"value":""}).encode())
@@ -723,10 +724,12 @@ def main(args):
723724
sys.exit(2)
724725

725726
if args.hordeconfig and args.hordeconfig[0]!="":
726-
global friendlymodelname, maxlen, showdebug
727+
global friendlymodelname, maxhordelen, showdebug
727728
friendlymodelname = "koboldcpp/"+args.hordeconfig[0]
728729
if len(args.hordeconfig) > 1:
729-
maxlen = int(args.hordeconfig[1])
730+
maxhordelen = int(args.hordeconfig[1])
731+
if len(args.hordeconfig) > 2:
732+
maxhordectx = int(args.hordeconfig[2])
730733
if args.debugmode == 0:
731734
args.debugmode = -1
732735

@@ -855,7 +858,7 @@ def main(args):
855858
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
856859
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0)
857860
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')
858-
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. An optional second parameter sets the horde max gen length.",metavar=('[hordename]', '[hordelength]'), nargs='+')
861+
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength and max ctxlen.",metavar=('[hordename]', '[hordelength] [hordectx]'), nargs='+')
859862
compatgroup = parser.add_mutually_exclusive_group()
860863
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
861864
compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)

0 commit comments

Comments
 (0)