@@ -387,7 +387,6 @@ else if ("user".equals(role)) {
387387 final Llama3 .Options options = httpSession .options ();
388388 final List <Integer > conversationTokens = httpSession .conversationTokens ();
389389 int startPosition = conversationTokens .size ();
390- System .out .format ("Tokens1 (start-pos %d): %s%n" , startPosition , conversationTokens );
391390
392391 ChatFormat chatFormat = new ChatFormat (model .tokenizer ());
393392 chatMessages .stream ().map (m -> String .format ("[%s]> %s" , m .role (), m .content ())).forEach (System .out ::println );
@@ -1491,10 +1490,10 @@ private static Vocabulary loadVocabulary(Map<String, Object> metadata) {
14911490 public static Llama loadModel (Path ggufPath , int contextLength , boolean loadWeights ) throws IOException {
14921491 GGUF gguf = GGUF .loadModel (ggufPath );
14931492 FileChannel fileChannel = FileChannel .open (ggufPath , StandardOpenOption .READ );
1494- return loadModel (fileChannel , gguf , contextLength , loadWeights );
1493+ return loadModel (ggufPath , fileChannel , gguf , contextLength , loadWeights );
14951494 }
14961495
1497- public static Llama loadModel (FileChannel fileChannel , GGUF gguf , int contextLength , boolean loadWeights ) throws IOException {
1496+ public static Llama loadModel (Path ggufPath , FileChannel fileChannel , GGUF gguf , int contextLength , boolean loadWeights ) throws IOException {
14981497 try (var ignored = Timer .log ("Load LlaMa model" )) {
14991498 Map <String , Object > metadata = gguf .getMetadata ();
15001499 Vocabulary vocabulary = loadVocabulary (metadata );
@@ -1521,7 +1520,7 @@ public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLen
15211520 Map <String , GGMLTensorEntry > tensorEntries = GGUF .loadTensors (fileChannel , gguf .getTensorDataOffset (), gguf .getTensorInfos ());
15221521 weights = loadWeights (tensorEntries , config );
15231522 }
1524- return new Llama (ggufPath .getFileName ().toString ().replaceFirst ("[.]gguf$" , "" ), config , tokenizer , qw );
1523+ return new Llama (ggufPath .getFileName ().toString ().replaceFirst ("[.]gguf$" , "" ), config , tokenizer , weights );
15251524 }
15261525 }
15271526
@@ -3103,7 +3102,7 @@ private static PartialModel preLoadGGUF(String modelPath) {
31033102 try (FileChannel fileChannel = FileChannel .open (path , StandardOpenOption .READ )) {
31043103 return new PartialModel (
31053104 path .getFileName ().toString (),
3106- ModelLoader .loadModel (fileChannel , gguf , Llama3 .Options .DEFAULT_MAX_TOKENS , false ),
3105+ ModelLoader .loadModel (path , fileChannel , gguf , Llama3 .Options .DEFAULT_MAX_TOKENS , false ),
31073106 gguf .getTensorDataOffset (),
31083107 gguf .getTensorInfos ()
31093108 );
@@ -3135,7 +3134,8 @@ public static Llama tryUsePreLoaded(Path modelPath, int contextLength) throws IO
31353134 // Load only the tensors (mmap slices).
31363135 Map <String , GGMLTensorEntry > tensorEntries = GGUF .loadTensors (fileChannel , preLoaded .tensorDataOffset (), preLoaded .tensorInfos ());
31373136 Llama .Weights weights = ModelLoader .loadWeights (tensorEntries , baseModel .configuration ());
3138- return new Llama (baseModel .configuration ().withContextLength (contextLength ), baseModel .tokenizer (), weights );
3137+ return new Llama (modelPath .getFileName ().toString ().replaceFirst (".gguf$" , "" ),
3138+ baseModel .configuration ().withContextLength (contextLength ), baseModel .tokenizer (), weights );
31393139 }
31403140 }
31413141}
0 commit comments