Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ CDP_API_KEY_PRIVATE_KEY=your_cdp_api_key_private_key
# Hyperbolic (Required)
HYPERBOLIC_API_KEY=your_hyperbolic_api_key

# RSA key file path (Required)
SSH_PRIVATE_KEY_PATH=~/.ssh/id_rsa

# LangChain (Required for Finetune tool)
LANGCHAIN_TRACING_V2=true
LANGCHAIN_ENDPOINT="your_langchain_endpoint"
LANGCHAIN_API_KEY="your_langchain_api_key"
Expand Down Expand Up @@ -51,5 +53,4 @@ USE_DEPLOY_MULTITOKEN=true
USE_WEB_SEARCH=true
USE_REQUEST_TOOLS=true


ELEVEN_API_KEY=your_eleven_api_key
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ id_rsa.pub

# Project specific
wallet_data.txt
twitter_state_default.db
.replit
.python-version

Expand All @@ -69,3 +70,7 @@ videofiles/

jsonoutputs/

# Finetuning
finetuned_model
remote_file_sync.py
finetune_example/training_data2.jsonl
2 changes: 1 addition & 1 deletion chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ async def initialize_agent():
print_error(f"Error initializing GitHub tools: {str(e)}")
print_error("GitHub tools will not be available")



# Create the runnable config with increased recursion limit
runnable_config = RunnableConfig(recursion_limit=200)
Expand Down
69 changes: 69 additions & 0 deletions finetune_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Quickstart
Demo video: https://www.loom.com/share/13dfa667db9f496188df284cb15c392b?sid=397e07f7-fb69-472e-8f93-29abea759ce8

* It is recommended to rent a GPU from the Hyperbolic web app first to simplify the agent flow, but this is optional.

## Setup
0. Make sure all the installation steps in the main README are completed.
1. First, set up the following REQUIRED env variables:
```
ANTHROPIC_API_KEY=
CDP_API_KEY_NAME=
CDP_API_KEY_PRIVATE_KEY=

HYPERBOLIC_API_KEY=

SSH_PRIVATE_KEY_PATH=/path/to/your/.ssh/id_rsa

LANGCHAIN_TRACING_V2=true
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY=
LANGCHAIN_PROJECT=
```

2. Boot up the chatbot gradio interface:
```
poetry run python gradio_ui.py
```

3. Type in the following prompt in the chatbot interface:
```
Run a fine tuning task using Mistral 7b using get_gpu_status first
```

4. Check your console logs and also SSH into your remote GPU instance to track the progress of the fine tuning task.
```
ssh ubuntu@<your-instance-ip> -p XXXXX
cd finetune_example
ls
```

5. Once the fine tuning task is complete, you will see a "success" message in the chatbot interface.

6. You can now use the fine tuned model for inference in your remote GPU instance by running:
```
source venv/bin/activate
python3 test_inference.py "Your prompt here"
```

7. You can also edit the finetune.py script or training_data.jsonl file by running:
```
nano finetune.py
nano training_data.jsonl
```

8. You can reinitiate another fine tuning task using your newly updated parameters (or another base model) by running:
```
export FINE_TUNE_MODEL="unsloth/mistral-7b-v0.3-bnb-4bit"
python3 finetune.py
```

or you can just ask the Hyperbolic agent through the chat interface to do it for you.

9. You can delete the finetuned model in your remote GPU instance by running:
```
rm -rf finetuned_model
```

* Support for syncing the finetuned model back to your local machine is coming soon.

127 changes: 127 additions & 0 deletions finetune_example/finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import os
import torch
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset
from datetime import datetime

def log_progress(msg):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] 🐰 {msg}")

def format_chat(example):
"""Format the chat messages into a single text string."""
messages = example["messages"]

# Format for Mistral chat template
formatted_messages = []
for msg in messages:
if msg["role"] == "system":
# System message goes at the start
formatted_messages.insert(0, f"<s>[INST] {msg['content']} [/INST]")
elif msg["role"] == "user":
formatted_messages.append(f"<s>[INST] {msg['content']} [/INST]")
elif msg["role"] == "assistant":
formatted_messages.append(f"{msg['content']}</s>")

example["text"] = " ".join(formatted_messages)
return example

def fine_tune():
log_progress("🚀 Starting fine-tuning process...")

# Load and format dataset
dataset = load_dataset("json", data_files={"train": "training_data.jsonl"})["train"]

# Model configuration
max_seq_length = 4096 # Increased for longer context
dtype = None # Auto-detect optimal dtype
load_in_4bit = True

# Configure model loading with explicit dtypes
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="unsloth/mistral-7b-v0.3-bnb-4bit",
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=load_in_4bit
)

# Set the chat template for Mistral
tokenizer.chat_template = """{% for message in messages %}
{% if message['role'] == 'user' %}
{{ '<s>[INST] ' + message['content'] + ' [/INST]' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'] + '</s>' }}
{% elif message['role'] == 'system' %}
{{ '<s>[INST] ' + message['content'] + ' [/INST]' }}
{% endif %}
{% endfor %}"""

# Add LoRA adapters with optimized settings
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_alpha=16,
lora_dropout=0, # Optimized setting
bias="none", # Optimized setting
use_gradient_checkpointing="unsloth", # Uses 30% less VRAM
random_state=3407,
use_rslora=False,
loftq_config=None
)

# Training arguments with mixed precision settings
training_args = TrainingArguments(
output_dir="./finetuned_model",
num_train_epochs=3,
per_device_train_batch_size=1, # Reduced for longer sequences
gradient_accumulation_steps=4,
warmup_steps=5,
learning_rate=2e-4,
logging_steps=1,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
# Use appropriate precision based on hardware
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
torch_compile=False, # Disable torch compile for stability
seed=3407
)

# Initialize trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
args=training_args,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
dataset_num_proc=2,
packing=False
)

# Train
log_progress("🏃 Training model...")
trainer_stats = trainer.train()

# Save the model
log_progress("💾 Saving fine-tuned model...")
output_dir = "./finetuned_model"

# Save model and tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# Optionally save in 8-bit GGUF format
try:
model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=["q8_0"])
except Exception as e:
print(f"Note: GGUF export failed (this is optional): {e}")

return trainer_stats

if __name__ == "__main__":
result = fine_tune()
14 changes: 14 additions & 0 deletions finetune_example/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
unsloth==2025.2.4
unsloth-zoo==2025.2.3
transformers==4.48.3
trl==0.14.0
xformers==0.0.28.post3
torch==2.5.1 --index-url https://download.pytorch.org/whl/cu118
torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu118
accelerate
bitsandbytes
datasets
peft
ninja
numpy<2.0
vllm==0.7.2
75 changes: 75 additions & 0 deletions finetune_example/test_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import json
import os
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def run_inference(prompt):
# Ensure CUDA is available and initialized
if not torch.cuda.is_available():
raise RuntimeError("CUDA is not available")

# Force CUDA initialization
torch.cuda.init()

# Print CUDA info for debugging
print(f"CUDA Device: {torch.cuda.get_device_name()}")
print(f"CUDA Version: {torch.version.cuda}")

# Always use the finetuned model directory for inference
model_dir = os.path.abspath("./finetuned_model")

try:
# Load base model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
model_dir,
torch_dtype=torch.float16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Prepare input
test_prompt = f"<s>[INST] {prompt} [/INST]"
inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)

# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.95,
do_sample=True
)

# Decode output
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Format output
output = {
"prompt": prompt,
"response": generated_text
}

# Save and print results
with open("inference_output.json", "w") as f:
json.dump(output, f, indent=2)

print("\n=== Test Inference Results ===")
print(f"Prompt: {output['prompt']}")
print(f"Response: {output['response']}")
print("============================\n")

return output

except Exception as e:
print(f"Error during inference: {str(e)}")
raise

if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python test_inference.py \"your prompt here\"")
sys.exit(1)

prompt = sys.argv[1]
run_inference(prompt)
Loading