From e040c634f1dbac27e8e238e7b4e6473e8bdbb91a Mon Sep 17 00:00:00 2001
From: Jingnan Zhou <jingnanzhou@users.noreply.github.com>
Date: Wed, 28 Feb 2024 21:55:24 -0500
Subject: [PATCH] fix issue #659 and #608

---
 guidance/models/llama_cpp/_llama_cpp.py     | 10 ++++-----
 notebooks/tutorials/intro_to_guidance.ipynb | 24 +++++++++++++++------
 2 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/guidance/models/llama_cpp/_llama_cpp.py b/guidance/models/llama_cpp/_llama_cpp.py
index e25b11481..66310c238 100644
--- a/guidance/models/llama_cpp/_llama_cpp.py
+++ b/guidance/models/llama_cpp/_llama_cpp.py
@@ -21,7 +21,7 @@
 class _LlamaBatchContext:
     def __init__(self, n_batch, n_ctx):
         self._llama_batch_free = llama_cpp.llama_batch_free
-        self.batch = llama_cpp.llama_batch_init(n_tokens=n_batch, embd=0, n_seq_max=n_ctx)
+        self.batch = llama_cpp.llama_batch_init(n_batch, 0, n_ctx)
         if self.batch is None:
             raise Exception("call to llama_cpp.llama_batch_init returned NULL.")
 
@@ -41,16 +41,16 @@ def __init__(self, model_obj):
 
         # get the bytes strings for all the tokens
         tokens = []
-        for i in range(tokenizer.llama.n_vocab()):
-            tok = tokenizer.llama.detokenize([i]) # note that detokenize returns bytes directly
+        for i in range(tokenizer._model.n_vocab()):
+            tok = tokenizer._model.detokenize([i]) # note that detokenize returns bytes directly
             if tok == b'':
                 tok = llama_cpp.llama_token_get_text(model_obj.model, i) # get text rep of special tokens
             tokens.append(tok)
 
         super().__init__(
             tokens,
-            tokenizer.llama.token_bos(),
-            tokenizer.llama.token_eos()
+            tokenizer._model.token_bos(),
+            tokenizer._model.token_eos()
         )
 
     def __call__(self, byte_string):
diff --git a/notebooks/tutorials/intro_to_guidance.ipynb b/notebooks/tutorials/intro_to_guidance.ipynb
index 4809b531a..15f7b9c2f 100644
--- a/notebooks/tutorials/intro_to_guidance.ipynb
+++ b/notebooks/tutorials/intro_to_guidance.ipynb
@@ -21,16 +21,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'guidance'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_56968/3823389102.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mguidance\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# For LlamaCpp, you need to provide the path on disk to a .gguf model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# A sample model can be downloaded from\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/blob/main/mistral-7b-instruct-v0.2.Q8_0.gguf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'guidance'"
+     ]
+    }
+   ],
    "source": [
     "from guidance import models\n",
     "\n",
     "# For LlamaCpp, you need to provide the path on disk to a .gguf model\n",
     "# A sample model can be downloaded from\n",
     "# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/blob/main/mistral-7b-instruct-v0.2.Q8_0.gguf\n",
-    "mistral = models.LlamaCpp(\"/home/scottlundberg_google_com/models/mistral-7b-instruct-v0.2.Q8_0.gguf\", n_gpu_layers=-1, n_ctx=4096)\n",
+    "mistral = models.LlamaCpp(\"/jingnan/lmaas/downloads/gemma-2b-it-q4_k_m.gguf\", n_gpu_layers=-1, n_ctx=4096)\n",
     "\n",
     "#llama2 = models.Transformers(\"meta-llama/Llama-2-7b-hf\")\n",
     "#gpt3 = models.OpenAI(\"text-davinci-003\")\n",
@@ -612,7 +624,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.10.12 64-bit",
    "language": "python",
    "name": "python3"
   },
@@ -626,11 +638,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {
-    "hash": "fd8cabafb56980528edb83a46206c404687fdaed0bdad7c450ae020143ae38bc"
+    "hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"
    }
   }
  },