HyperbolicLabs · ZILECAO · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 13, 2025
diff --git a/.env.example b/.env.example
@@ -11,8 +11,10 @@ CDP_API_KEY_PRIVATE_KEY=your_cdp_api_key_private_key
 # Hyperbolic (Required)
 HYPERBOLIC_API_KEY=your_hyperbolic_api_key
 
+# RSA key file path (Required)
 SSH_PRIVATE_KEY_PATH=~/.ssh/id_rsa
 
+# LangChain (Required for Finetune tool)
 LANGCHAIN_TRACING_V2=true
 LANGCHAIN_ENDPOINT="your_langchain_endpoint"
 LANGCHAIN_API_KEY="your_langchain_api_key"
@@ -51,5 +53,4 @@ USE_DEPLOY_MULTITOKEN=true
 USE_WEB_SEARCH=true
 USE_REQUEST_TOOLS=true
 
-
 ELEVEN_API_KEY=your_eleven_api_key
diff --git a/.gitignore b/.gitignore
@@ -51,6 +51,7 @@ id_rsa.pub
 
 # Project specific
 wallet_data.txt
+twitter_state_default.db
 .replit
 .python-version
 
@@ -69,3 +70,7 @@ videofiles/
 
 jsonoutputs/
 
+# Finetuning
+finetuned_model
+remote_file_sync.py
+finetune_example/training_data2.jsonl
diff --git a/chatbot.py b/chatbot.py
@@ -894,7 +894,7 @@ async def initialize_agent():
                 print_error(f"Error initializing GitHub tools: {str(e)}")
                 print_error("GitHub tools will not be available")
 
-
+            
 
         # Create the runnable config with increased recursion limit
         runnable_config = RunnableConfig(recursion_limit=200)

diff --git a/finetune_example/README.md b/finetune_example/README.md
@@ -0,0 +1,69 @@
+# Quickstart
+Demo video: https://www.loom.com/share/13dfa667db9f496188df284cb15c392b?sid=397e07f7-fb69-472e-8f93-29abea759ce8
+
+* It is recommended to rent a GPU from the Hyperbolic web app first to simplify the agent flow, but this is optional.
+
+## Setup
+0. Make sure all the installation steps in the main README are completed.
+1. First, set up the following REQUIRED env variables:
+```
+ANTHROPIC_API_KEY=
+CDP_API_KEY_NAME=
+CDP_API_KEY_PRIVATE_KEY=
+
+HYPERBOLIC_API_KEY=
+
+SSH_PRIVATE_KEY_PATH=/path/to/your/.ssh/id_rsa
+
+LANGCHAIN_TRACING_V2=true
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+LANGCHAIN_API_KEY=
+LANGCHAIN_PROJECT=
+```
+
+2. Boot up the chatbot gradio interface:
+```
+poetry run python gradio_ui.py
+```
+
+3. Type in the following prompt in the chatbot interface:
+```
+Run a fine tuning task using Mistral 7b using get_gpu_status first
+```
+
+4. Check your console logs and also SSH into your remote GPU instance to track the progress of the fine tuning task.
+```
+ssh ubuntu@<your-instance-ip> -p XXXXX
+cd finetune_example
+ls
+```
+
+5. Once the fine tuning task is complete, you will see a "success" message in the chatbot interface.
+
+6. You can now use the fine tuned model for inference in your remote GPU instance by running:
+```
+source venv/bin/activate
+python3 test_inference.py "Your prompt here"
+```
+
+7. You can also edit the finetune.py script or training_data.jsonl file by running:
+```
+nano finetune.py
+nano training_data.jsonl
+```
+
+8. You can reinitiate another fine tuning task using your newly updated parameters (or another base model) by running:
+```
+export FINE_TUNE_MODEL="unsloth/mistral-7b-v0.3-bnb-4bit"
+python3 finetune.py 
+```
+
+or you can just ask the Hyperbolic agent through the chat interface to do it for you.
+
+9. You can delete the finetuned model in your remote GPU instance by running:
+```
+rm -rf finetuned_model
+```
+
+* Support for syncing the finetuned model back to your local machine is coming soon.
+
diff --git a/finetune_example/finetune.py b/finetune_example/finetune.py
@@ -0,0 +1,127 @@
+import os
+import torch
+from unsloth import FastLanguageModel, is_bfloat16_supported
+from transformers import TrainingArguments
+from trl import SFTTrainer
+from datasets import load_dataset
+from datetime import datetime
+
+def log_progress(msg):
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    print(f"[{timestamp}] 🐰 {msg}")
+
+def format_chat(example):
+    """Format the chat messages into a single text string."""
+    messages = example["messages"]
+
+    # Format for Mistral chat template
+    formatted_messages = []
+    for msg in messages:
+        if msg["role"] == "system":
+            # System message goes at the start
+            formatted_messages.insert(0, f"<s>[INST] {msg['content']} [/INST]")
+        elif msg["role"] == "user":
+            formatted_messages.append(f"<s>[INST] {msg['content']} [/INST]")
+        elif msg["role"] == "assistant":
+            formatted_messages.append(f"{msg['content']}</s>")
+
+    example["text"] = " ".join(formatted_messages)
+    return example
+
+def fine_tune():
+    log_progress("🚀 Starting fine-tuning process...")
+
+    # Load and format dataset
+    dataset = load_dataset("json", data_files={"train": "training_data.jsonl"})["train"]
+
+    # Model configuration
+    max_seq_length = 4096  # Increased for longer context
+    dtype = None  # Auto-detect optimal dtype
+    load_in_4bit = True
+
+    # Configure model loading with explicit dtypes
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name="unsloth/mistral-7b-v0.3-bnb-4bit",
+        max_seq_length=max_seq_length,
+        dtype=dtype,
+        load_in_4bit=load_in_4bit
+    )
+
+    # Set the chat template for Mistral
+    tokenizer.chat_template = """{% for message in messages %}
+    {% if message['role'] == 'user' %}
+    {{ '<s>[INST] ' + message['content'] + ' [/INST]' }}
+    {% elif message['role'] == 'assistant' %}
+    {{ message['content'] + '</s>' }}
+    {% elif message['role'] == 'system' %}
+    {{ '<s>[INST] ' + message['content'] + ' [/INST]' }}
+    {% endif %}
+    {% endfor %}"""
+
+    # Add LoRA adapters with optimized settings
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r=16,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", 
+                       "gate_proj", "up_proj", "down_proj"],
+        lora_alpha=16,
+        lora_dropout=0,  # Optimized setting
+        bias="none",     # Optimized setting
+        use_gradient_checkpointing="unsloth",  # Uses 30% less VRAM
+        random_state=3407,
+        use_rslora=False,
+        loftq_config=None
+    )
+
+    # Training arguments with mixed precision settings
+    training_args = TrainingArguments(
+        output_dir="./finetuned_model",
+        num_train_epochs=3,
+        per_device_train_batch_size=1,  # Reduced for longer sequences
+        gradient_accumulation_steps=4,
+        warmup_steps=5,
+        learning_rate=2e-4,
+        logging_steps=1,
+        optim="adamw_8bit",
+        weight_decay=0.01,
+        lr_scheduler_type="linear",
+        # Use appropriate precision based on hardware
+        fp16=not is_bfloat16_supported(),
+        bf16=is_bfloat16_supported(),
+        torch_compile=False,  # Disable torch compile for stability
+        seed=3407
+    )
+
+    # Initialize trainer
+    trainer = SFTTrainer(
+        model=model,
+        train_dataset=dataset,
+        args=training_args,
+        tokenizer=tokenizer,
+        max_seq_length=max_seq_length,
+        dataset_num_proc=2,
+        packing=False
+    )
+
+    # Train
+    log_progress("🏃 Training model...")
+    trainer_stats = trainer.train()
+
+    # Save the model
+    log_progress("💾 Saving fine-tuned model...")
+    output_dir = "./finetuned_model"
+
+    # Save model and tokenizer
+    model.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+
+    # Optionally save in 8-bit GGUF format
+    try:
+        model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=["q8_0"])
+    except Exception as e:
+        print(f"Note: GGUF export failed (this is optional): {e}")
+
+    return trainer_stats
+
+if __name__ == "__main__":
+    result = fine_tune()
diff --git a/finetune_example/requirements.txt b/finetune_example/requirements.txt
@@ -0,0 +1,14 @@
+unsloth==2025.2.4
+unsloth-zoo==2025.2.3
+transformers==4.48.3
+trl==0.14.0
+xformers==0.0.28.post3
+torch==2.5.1 --index-url https://download.pytorch.org/whl/cu118
+torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu118
+accelerate
+bitsandbytes
+datasets
+peft
+ninja
+numpy<2.0
+vllm==0.7.2
diff --git a/finetune_example/test_inference.py b/finetune_example/test_inference.py
@@ -0,0 +1,75 @@
+import json
+import os
+import sys
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+def run_inference(prompt):
+    # Ensure CUDA is available and initialized
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is not available")
+
+    # Force CUDA initialization
+    torch.cuda.init()
+
+    # Print CUDA info for debugging
+    print(f"CUDA Device: {torch.cuda.get_device_name()}")
+    print(f"CUDA Version: {torch.version.cuda}")
+
+    # Always use the finetuned model directory for inference
+    model_dir = os.path.abspath("./finetuned_model")
+
+    try:
+        # Load base model and tokenizer
+        model = AutoModelForCausalLM.from_pretrained(
+            model_dir,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_dir)
+
+        # Prepare input
+        test_prompt = f"<s>[INST] {prompt} [/INST]"
+        inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
+
+        # Generate
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.95,
+                do_sample=True
+            )
+
+        # Decode output
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+        # Format output
+        output = {
+            "prompt": prompt,
+            "response": generated_text
+        }
+
+        # Save and print results
+        with open("inference_output.json", "w") as f:
+            json.dump(output, f, indent=2)
+
+        print("\n=== Test Inference Results ===")
+        print(f"Prompt: {output['prompt']}")
+        print(f"Response: {output['response']}")
+        print("============================\n")
+
+        return output
+
+    except Exception as e:
+        print(f"Error during inference: {str(e)}")
+        raise
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python test_inference.py \"your prompt here\"")
+        sys.exit(1)
+
+    prompt = sys.argv[1]
+    run_inference(prompt)