Skip to content

Commit 8e13cf7

Browse files
committed
Fixes after rebase.
1 parent c61567b commit 8e13cf7

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

Llama3.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ else if ("user".equals(role)) {
387387
final Llama3.Options options = httpSession.options();
388388
final List<Integer> conversationTokens = httpSession.conversationTokens();
389389
int startPosition = conversationTokens.size();
390-
System.out.format("Tokens1 (start-pos %d): %s%n", startPosition, conversationTokens);
391390

392391
ChatFormat chatFormat = new ChatFormat(model.tokenizer());
393392
chatMessages.stream().map(m -> String.format("[%s]> %s", m.role(), m.content())).forEach(System.out::println);
@@ -1491,10 +1490,10 @@ private static Vocabulary loadVocabulary(Map<String, Object> metadata) {
14911490
public static Llama loadModel(Path ggufPath, int contextLength, boolean loadWeights) throws IOException {
14921491
GGUF gguf = GGUF.loadModel(ggufPath);
14931492
FileChannel fileChannel = FileChannel.open(ggufPath, StandardOpenOption.READ);
1494-
return loadModel(fileChannel, gguf, contextLength, loadWeights);
1493+
return loadModel(ggufPath, fileChannel, gguf, contextLength, loadWeights);
14951494
}
14961495

1497-
public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) throws IOException {
1496+
public static Llama loadModel(Path ggufPath, FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) throws IOException {
14981497
try (var ignored = Timer.log("Load LlaMa model")) {
14991498
Map<String, Object> metadata = gguf.getMetadata();
15001499
Vocabulary vocabulary = loadVocabulary(metadata);
@@ -1521,7 +1520,7 @@ public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLen
15211520
Map<String, GGMLTensorEntry> tensorEntries = GGUF.loadTensors(fileChannel, gguf.getTensorDataOffset(), gguf.getTensorInfos());
15221521
weights = loadWeights(tensorEntries, config);
15231522
}
1524-
return new Llama(ggufPath.getFileName().toString().replaceFirst("[.]gguf$", ""), config, tokenizer, qw);
1523+
return new Llama(ggufPath.getFileName().toString().replaceFirst("[.]gguf$", ""), config, tokenizer, weights);
15251524
}
15261525
}
15271526

@@ -3103,7 +3102,7 @@ private static PartialModel preLoadGGUF(String modelPath) {
31033102
try (FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.READ)) {
31043103
return new PartialModel(
31053104
path.getFileName().toString(),
3106-
ModelLoader.loadModel(fileChannel, gguf, Llama3.Options.DEFAULT_MAX_TOKENS, false),
3105+
ModelLoader.loadModel(path, fileChannel, gguf, Llama3.Options.DEFAULT_MAX_TOKENS, false),
31073106
gguf.getTensorDataOffset(),
31083107
gguf.getTensorInfos()
31093108
);
@@ -3135,7 +3134,8 @@ public static Llama tryUsePreLoaded(Path modelPath, int contextLength) throws IO
31353134
// Load only the tensors (mmap slices).
31363135
Map<String, GGMLTensorEntry> tensorEntries = GGUF.loadTensors(fileChannel, preLoaded.tensorDataOffset(), preLoaded.tensorInfos());
31373136
Llama.Weights weights = ModelLoader.loadWeights(tensorEntries, baseModel.configuration());
3138-
return new Llama(baseModel.configuration().withContextLength(contextLength), baseModel.tokenizer(), weights);
3137+
return new Llama(modelPath.getFileName().toString().replaceFirst(".gguf$", ""),
3138+
baseModel.configuration().withContextLength(contextLength), baseModel.tokenizer(), weights);
31393139
}
31403140
}
31413141
}

0 commit comments

Comments
 (0)