From 8e985b5626edf7db8179511a02aa5eb8c652b0e7 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Wed, 12 Feb 2025 12:49:50 -0500 Subject: [PATCH 01/10] first run_finetune tool call success --- .env.example | 1 + .gitignore | 3 + chatbot.py | 6 +- gradio_ui.py | 7 +- .../actions/run_remote_finetune.py | 298 ++++++++++++++++++ 5 files changed, 311 insertions(+), 4 deletions(-) create mode 100644 hyperbolic_agentkit_core/actions/run_remote_finetune.py diff --git a/.env.example b/.env.example index cd558170..8d80c67a 100644 --- a/.env.example +++ b/.env.example @@ -50,6 +50,7 @@ USE_RETWEET=true USE_DEPLOY_MULTITOKEN=true USE_WEB_SEARCH=true USE_REQUEST_TOOLS=true +USE_FINETUNE_TOOLS=true ELEVEN_API_KEY=your_eleven_api_key \ No newline at end of file diff --git a/.gitignore b/.gitignore index c75cdefd..222f9d6f 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ id_rsa.pub # Project specific wallet_data.txt +twitter_state_default.db .replit .python-version @@ -69,3 +70,5 @@ videofiles/ jsonoutputs/ +finetune_example +remote_file_sync.py \ No newline at end of file diff --git a/chatbot.py b/chatbot.py index 215e93d6..09084cb9 100644 --- a/chatbot.py +++ b/chatbot.py @@ -72,6 +72,8 @@ from langchain_core.runnables import RunnableConfig from podcast_agent.podcast_knowledge_base import PodcastKnowledgeBase +from hyperbolic_agentkit_core.actions.run_remote_finetune import RunFinetuneAction + async def generate_llm_podcast_query(llm: ChatAnthropic = None) -> str: """ Generates a dynamic, contextually-aware query for the podcast knowledge base using an LLM. @@ -894,7 +896,9 @@ async def initialize_agent(): print_error(f"Error initializing GitHub tools: {str(e)}") print_error("GitHub tools will not be available") - + # Add fine-tuning tool + if os.getenv("USE_FINETUNE_TOOLS", "false").lower() == "true": + tools.append(RunFinetuneAction()) # Create the runnable config with increased recursion limit runnable_config = RunnableConfig(recursion_limit=200) diff --git a/gradio_ui.py b/gradio_ui.py index bab3e2d2..886a9d45 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -10,14 +10,15 @@ async def chat_with_agent(message, history): # Initialize agent if not already done if not hasattr(chat_with_agent, "agent"): - agent_executor, config, twitter_api_wrapper, knowledge_base = await initialize_agent() + agent_executor, config, runnable_config, twitter_api_wrapper, knowledge_base, podcast_knowledge_base = await initialize_agent() chat_with_agent.agent = agent_executor chat_with_agent.config = config + chat_with_agent.runnable_config = runnable_config # Store runnable_config as well runnable_config = RunnableConfig( - recursion_limit=config["configurable"]["recursion_limit"], + recursion_limit=chat_with_agent.config["configurable"]["recursion_limit"], configurable={ - "thread_id": config["configurable"]["thread_id"], + "thread_id": chat_with_agent.config["configurable"]["thread_id"], "checkpoint_ns": "chat_mode", "checkpoint_id": str(datetime.now().timestamp()) } diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py new file mode 100644 index 00000000..632deb51 --- /dev/null +++ b/hyperbolic_agentkit_core/actions/run_remote_finetune.py @@ -0,0 +1,298 @@ +import requests +import json +import os +import subprocess +from typing import Optional, Dict, Any +from collections.abc import Callable +from pydantic import BaseModel, Field +from langchain.tools import BaseTool +from langchain.callbacks.manager import CallbackManagerForToolRun + +from hyperbolic_agentkit_core.actions.hyperbolic_action import HyperbolicAction + +from hyperbolic_agentkit_core.actions.ssh_manager import ssh_manager +from hyperbolic_agentkit_core.actions.get_gpu_status import get_gpu_status + + +class RunFinetuneInput(BaseModel): + """Input argument schema for fine-tuning action.""" + model_name: str = Field( + ..., + description="The name of the base model to fine-tune" + ) + + +class RunFinetuneAction(BaseTool): + name: str = "run_finetune" + description: str = """This tool will execute fine-tuning of an AI model on Hyperbolic's GPU infrastructure. + It takes the model name as input (e.g., "unsloth/mistral-7b-v0.3-bnb-4bit"). + The model will be fine-tuned using unsloth and set up for local inference using vLLM. + Training data should be prepared in advance in the data/training_data.jsonl file.""" + args_schema: type[BaseModel] = RunFinetuneInput + return_direct: bool = False + + def _run( + self, + model_name: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + try: + # Step 0: Get GPU status + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return json.dumps({"status": "error", "message": "No active GPU instances found"}) + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = int(parts[3]) + username = parts[1].split('@')[0] + + # Step 1: Establish SSH connection first + ssh_result = ssh_manager.connect(host=host, port=port, username=username) + if isinstance(ssh_result, str) and "Error" in ssh_result: + return json.dumps({"status": "error", "message": f"Failed to connect to remote GPU: {ssh_result}"}) + + # Step 2: Setup remote environment + # Install rsync + shell_result = ssh_manager.execute("sudo apt-get update && sudo apt-get install -y rsync") + if isinstance(shell_result, str) and "error" in shell_result.lower(): + return json.dumps({"status": "error", "message": f"Failed to install rsync: {shell_result}"}) + + + # Step 3: Sync files to remote GPU + sync_result = sync_to_remote() + if isinstance(sync_result, dict) and not sync_result.get("success", False): + return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) + + # Step 4: Execute setup and training commands in a single shell session + combined_command = " && ".join([ + "python3 -m venv venv", + "source venv/bin/activate", + "pip install -r requirements.txt", + f"FINE_TUNE_MODEL={model_name} python3 finetune.py" + ]) + + shell_result = ssh_manager.execute(combined_command) + if isinstance(shell_result, str) and "error" in shell_result.lower(): + return json.dumps({ + "status": "error", + "message": f"Command failed: {shell_result}" + }) + + return json.dumps({ + "status": "success", + "message": "Fine-tuning completed successfully", + "model_name": model_name + }) + + except Exception as e: + return json.dumps({ + "status": "error", + "message": str(e) + }) + + async def _arun( + self, + model_name: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Async implementation of the tool.""" + return self._run(model_name) + +def parse_metrics(output: str) -> Dict[str, Any]: + """Parse training metrics from command output.""" + metrics = {} + try: + # Look for lines containing metrics in the format "metric: value" + for line in output.split('\n'): + if ':' in line: + key, value = line.split(':', 1) + key = key.strip() + value = value.strip() + try: + # Try to convert to float if possible + value = float(value) + except: + pass + metrics[key] = value + except Exception as e: + metrics["parse_error"] = str(e) + return metrics + + + +def setup_vllm_inference(model_name: str) -> Dict: + """Sets up vLLM for inference with the fine-tuned model.""" + try: + ssh_string = os.getenv("HYPERBOLIC_SSH_STRING") + if not ssh_string: + raise ValueError("HYPERBOLIC_SSH_STRING not found in environment") + + # Install vLLM and setup inference + setup_cmd = [ + "ssh", ssh_string, + f"cd /home/ubuntu/finetune_example && " + f"source venv/bin/activate && " + f"pip install vllm && " + f"python3 -c '" + f"from vllm import LLM; " + f"llm = LLM(model=\"/home/ubuntu/finetune_example/finetuned_model\"); " + f"print(\"vLLM setup complete\")'" + ] + + result = subprocess.run(setup_cmd, capture_output=True, text=True, check=True) + + return { + "success": True, + "status": "vLLM inference setup complete" + } + + except Exception as e: + return {"success": False, "error": str(e)} + +def sync_from_remote() -> Dict[str, bool]: + """Syncs fine-tuned model from remote GPU.""" + try: + # Get GPU status and SSH details + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return {"success": False, "error": "No active GPU instances found"} + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command with error handling + try: + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = parts[3] + username = parts[1].split('@')[0] + except (IndexError, AttributeError) as e: + return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} + + # Create local directory if it doesn't exist + local_dir = "../../finetune_example/finetuned_model" # This is right + os.makedirs(local_dir, exist_ok=True) + + # Verify remote directory exists + remote_dir = "/home/ubuntu/finetune_example/finetuned_model/" # TODO: this will be wrong + check_result = ssh_manager.execute(f"test -d {remote_dir} && echo 'exists'") + if "exists" not in str(check_result): + return { + "success": False, + "error": f"Remote directory not found: {remote_dir}" + } + + # Sync fine-tuned model back with error handling + result = subprocess.run([ + "rsync", "-avz", + "-e", f"ssh -p {port}", + f"{username}@{host}:{remote_dir}", + local_dir + ], capture_output=True, text=True) + + if result.returncode != 0: + return { + "success": False, + "error": f"Failed to sync from remote: {result.stderr}" + } + + # Verify files were synced correctly + if not os.path.exists(local_dir) or not os.listdir(local_dir): + return { + "success": False, + "error": "Sync completed but no files found in local directory" + } + + return { + "success": True, + "message": "Model files synced successfully from remote" + } + + except Exception as e: + return { + "success": False, + "error": f"Unexpected error during file sync: {str(e)}" + } + +def sync_to_remote() -> Dict[str, bool]: + """Syncs local files to remote GPU for fine-tuning.""" + try: + # Get GPU status and SSH details + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return {"success": False, "error": "No active GPU instances found"} + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command with error handling + try: + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = parts[3] + username = parts[1].split('@')[0] + except (IndexError, AttributeError) as e: + return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} + + # Required files to sync + local_files = [ + "./finetune_example/training_data.jsonl", + "./finetune_example/requirements.txt", + "./finetune_example/finetune.py", + ] + + + # Check if all required files exist + for local_file in local_files: + if not os.path.exists(local_file): + return { + "success": False, + "error": f"Required file not found: {local_file}" + } + + + + # Sync each file with error handling + for local_file in local_files: + remote_path = f"/home/ubuntu/{'/'.join(local_file.split('/')[2:])}" + + # Run rsync with output capture + result = subprocess.run([ + "rsync", "-avz", + "-e", f"ssh -p {port}", + local_file, + f"{username}@{host}:{remote_path}" + ], capture_output=True, text=True) + + if result.returncode != 0: + return { + "success": False, + "error": f"Failed to sync {local_file}: {result.stderr}" + } + + # Verify files were synced correctly + for local_file in local_files: + remote_path = f"/home/ubuntu/{'/'.join(local_file.split('/')[2:])}" + check_result = ssh_manager.execute(f"test -f {remote_path} && echo 'exists'") + + if "exists" not in str(check_result): + return { + "success": False, + "error": f"Failed to verify file sync for: {remote_path}" + } + + return { + "success": True, + "message": "All files synced successfully" + } + + except Exception as e: + return { + "success": False, + "error": f"Unexpected error during file sync: {str(e)}" + } \ No newline at end of file From 9866da9086a2c09da7f95ec5b68d8d0ee6806c94 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Wed, 12 Feb 2025 14:22:49 -0500 Subject: [PATCH 02/10] fine tune works and outputs "finetuned_model" --- .../actions/run_remote_finetune.py | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py index 632deb51..620f0001 100644 --- a/hyperbolic_agentkit_core/actions/run_remote_finetune.py +++ b/hyperbolic_agentkit_core/actions/run_remote_finetune.py @@ -69,18 +69,23 @@ def _run( return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) # Step 4: Execute setup and training commands in a single shell session - combined_command = " && ".join([ - "python3 -m venv venv", - "source venv/bin/activate", - "pip install -r requirements.txt", + combined_command = ( + "cd finetune_example && " + "bash -c '" + "python3 -m venv venv && " + "source venv/bin/activate && " + "pip install -r requirements.txt && " f"FINE_TUNE_MODEL={model_name} python3 finetune.py" - ]) + "'" + ) shell_result = ssh_manager.execute(combined_command) - if isinstance(shell_result, str) and "error" in shell_result.lower(): + # Check for the finetuned_model directory + verify_result = ssh_manager.execute("test -d /home/ubuntu/finetune_example/finetuned_model && echo 'exists'") + if "exists" not in str(verify_result): return json.dumps({ - "status": "error", - "message": f"Command failed: {shell_result}" + "status": "error", + "message": f"Fine-tuning failed or directory not created. Output: {shell_result}" }) return json.dumps({ @@ -255,11 +260,18 @@ def sync_to_remote() -> Dict[str, bool]: "error": f"Required file not found: {local_file}" } + # Create base directory on remote + result = ssh_manager.execute("mkdir -p ~/finetune_example") + if isinstance(result, str) and "error" in result.lower(): + return { + "success": False, + "error": f"Failed to create base directory: {result}" + } # Sync each file with error handling for local_file in local_files: - remote_path = f"/home/ubuntu/{'/'.join(local_file.split('/')[2:])}" + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" # Run rsync with output capture result = subprocess.run([ @@ -277,7 +289,7 @@ def sync_to_remote() -> Dict[str, bool]: # Verify files were synced correctly for local_file in local_files: - remote_path = f"/home/ubuntu/{'/'.join(local_file.split('/')[2:])}" + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" check_result = ssh_manager.execute(f"test -f {remote_path} && echo 'exists'") if "exists" not in str(check_result): From 42a9a0690bba542634e8c75fcf1ee87503f2e2d8 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Wed, 12 Feb 2025 14:30:46 -0500 Subject: [PATCH 03/10] add finetune example parameters --- .gitignore | 2 +- finetune_example/finetune.py | 107 +++++++++++++++++++++++++++ finetune_example/requirements.txt | 8 ++ finetune_example/training_data.jsonl | 100 +++++++++++++++++++++++++ 4 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 finetune_example/finetune.py create mode 100644 finetune_example/requirements.txt create mode 100644 finetune_example/training_data.jsonl diff --git a/.gitignore b/.gitignore index 222f9d6f..f4d6ccb2 100644 --- a/.gitignore +++ b/.gitignore @@ -70,5 +70,5 @@ videofiles/ jsonoutputs/ -finetune_example +finetuned_model remote_file_sync.py \ No newline at end of file diff --git a/finetune_example/finetune.py b/finetune_example/finetune.py new file mode 100644 index 00000000..18db7560 --- /dev/null +++ b/finetune_example/finetune.py @@ -0,0 +1,107 @@ +import os +import torch +from unsloth import FastLanguageModel +from transformers import TrainingArguments +from trl import SFTTrainer +from datasets import load_dataset +from datetime import datetime + +def log_progress(msg): + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"[{timestamp}] 🐰 {msg}") + +def format_instruction(example): + """Format the instruction, input, and output into a single text string.""" + # Format: [INST] instruction + input [/INST] output + instruction = example["instruction"] + input_text = example["input"] + output = example["output"] + + if input_text: + prompt = f"{instruction}\n{input_text}" + else: + prompt = instruction + + # Format for Mistral chat template + example["text"] = f"[INST] {prompt} [/INST] {output}" + return example + +def fine_tune(): + log_progress("🚀 Starting fine-tuning process...") + + # Load and format dataset + dataset = load_dataset("json", data_files={"train": "training_data.jsonl"})["train"] + dataset = dataset.map(format_instruction) + + print("Dataset features:", dataset.features) + print("First formatted example:", dataset[0]["text"][:200] + "...") + + # Configure model loading with explicit dtypes + model, tokenizer = FastLanguageModel.from_pretrained( + model_name="unsloth/mistral-7b-v0.3-bnb-4bit", + max_seq_length=2048, + load_in_4bit=True, + quantization_config={ + "load_in_4bit": True, + "bnb_4bit_compute_dtype": torch.bfloat16 + } + ) + + # Add LoRA adapters with explicit dtype + model = FastLanguageModel.get_peft_model( + model, + r=16, + target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], + lora_alpha=16, + lora_dropout=0, + bias="none", + use_gradient_checkpointing=True, + ) + + # Training arguments with mixed precision settings + training_args = TrainingArguments( + output_dir="./finetuned_model", + num_train_epochs=3, + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + learning_rate=2e-4, + logging_steps=1, + optim="adamw_8bit", + bf16=True, # Use bfloat16 precision + torch_compile=False, # Disable torch compile for stability + ) + + # Initialize trainer with the correct dataset field + trainer = SFTTrainer( + model=model, + train_dataset=dataset, + dataset_text_field="text", # Now using the formatted text field + args=training_args, + tokenizer=tokenizer, + max_seq_length=2048, + packing=False, + ) + + # Train + log_progress("🏃 Training model...") + trainer.train() + + # Save the model + log_progress("💾 Saving fine-tuned model...") + # Save model to a specific directory + output_dir = os.path.join(os.getcwd(), "finetuned_model") + os.makedirs(output_dir, exist_ok=True) + trainer.save_model(output_dir) + + # Force sync the saved files + if os.path.exists(output_dir): + # Save a marker file to verify directory contents + with open(os.path.join(output_dir, "model_saved.txt"), "w") as f: + f.write("Model saved successfully") + + log_progress("✨ Fine-tuning complete!") + return {"status": "success", "model_path": output_dir} + +if __name__ == "__main__": + result = fine_tune() + print(f"Model saved to: {result['model_path']}") \ No newline at end of file diff --git a/finetune_example/requirements.txt b/finetune_example/requirements.txt new file mode 100644 index 00000000..7fd1139f --- /dev/null +++ b/finetune_example/requirements.txt @@ -0,0 +1,8 @@ +unsloth +torch>=2.0.0 +transformers>=4.37.0 +accelerate>=0.27.1 +bitsandbytes>=0.41.3 +trl>=0.7.11 +datasets +peft>=0.7.0 \ No newline at end of file diff --git a/finetune_example/training_data.jsonl b/finetune_example/training_data.jsonl new file mode 100644 index 00000000..650a4b5b --- /dev/null +++ b/finetune_example/training_data.jsonl @@ -0,0 +1,100 @@ +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-06-17 00:00:00.000 UTC, blockchain: avalanche_c, usd_volume: 12466440.018553272\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-06-17 00:00:00.000 UTC, blockchain: avalanche_c, usd_volume: 12466440.018553272"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-04-08 00:00:00.000 UTC, blockchain: polygon, usd_volume: 785688892.5893831\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-04-08 00:00:00.000 UTC, blockchain: polygon, usd_volume: 785688892.5893831"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-08-12 00:00:00.000 UTC, blockchain: zora, usd_volume: 864660.1158771071\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-08-12 00:00:00.000 UTC, blockchain: zora, usd_volume: 864660.1158771071"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-06-17 00:00:00.000 UTC, blockchain: zora, usd_volume: 184387.17609555868\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-06-17 00:00:00.000 UTC, blockchain: zora, usd_volume: 184387.17609555868"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-07-29 00:00:00.000 UTC, blockchain: bnb, usd_volume: 281940807.4814643\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-07-29 00:00:00.000 UTC, blockchain: bnb, usd_volume: 281940807.4814643"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-03-04 00:00:00.000 UTC, blockchain: zksync, usd_volume: 221583.92649303746\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-03-04 00:00:00.000 UTC, blockchain: zksync, usd_volume: 221583.92649303746"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-07-22 00:00:00.000 UTC, blockchain: blast, usd_volume: 2614166.6535020834\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-07-22 00:00:00.000 UTC, blockchain: blast, usd_volume: 2614166.6535020834"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-11-18 00:00:00.000 UTC, blockchain: base, usd_volume: 3431475959.9909487\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-11-18 00:00:00.000 UTC, blockchain: base, usd_volume: 3431475959.9909487"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-10-28 00:00:00.000 UTC, blockchain: polygon, usd_volume: 689676631.8130487\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-10-28 00:00:00.000 UTC, blockchain: polygon, usd_volume: 689676631.8130487"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-04 00:00:00.000 UTC, usd: 160020286.43249646\"}","output":"Polymarket daily volume:\n\nday: 2024-11-04 00:00:00.000 UTC, usd: 160020286.43249646"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-05 00:00:00.000 UTC, usd: 238387272.50343198\"}","output":"Polymarket daily volume:\n\nday: 2024-11-05 00:00:00.000 UTC, usd: 238387272.50343198"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-06 00:00:00.000 UTC, usd: 367352111.2733602\"}","output":"Polymarket daily volume:\n\nday: 2024-11-06 00:00:00.000 UTC, usd: 367352111.2733602"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-07 00:00:00.000 UTC, usd: 54086823.63269596\"}","output":"Polymarket daily volume:\n\nday: 2024-11-07 00:00:00.000 UTC, usd: 54086823.63269596"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-08 00:00:00.000 UTC, usd: 61978622.08457193\"}","output":"Polymarket daily volume:\n\nday: 2024-11-08 00:00:00.000 UTC, usd: 61978622.08457193"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-09 00:00:00.000 UTC, usd: 60770440.102616906\"}","output":"Polymarket daily volume:\n\nday: 2024-11-09 00:00:00.000 UTC, usd: 60770440.102616906"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-10 00:00:00.000 UTC, usd: 71852907.82880792\"}","output":"Polymarket daily volume:\n\nday: 2024-11-10 00:00:00.000 UTC, usd: 71852907.82880792"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-11 00:00:00.000 UTC, usd: 75559628.377674\"}","output":"Polymarket daily volume:\n\nday: 2024-11-11 00:00:00.000 UTC, usd: 75559628.377674"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-12 00:00:00.000 UTC, usd: 66244211.641023025\"}","output":"Polymarket daily volume:\n\nday: 2024-11-12 00:00:00.000 UTC, usd: 66244211.641023025"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-13 00:00:00.000 UTC, usd: 81315019.16197997\"}","output":"Polymarket daily volume:\n\nday: 2024-11-13 00:00:00.000 UTC, usd: 81315019.16197997"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-14 00:00:00.000 UTC, usd: 83868143.723078\"}","output":"Polymarket daily volume:\n\nday: 2024-11-14 00:00:00.000 UTC, usd: 83868143.723078"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-15 00:00:00.000 UTC, usd: 85388123.31406502\"}","output":"Polymarket daily volume:\n\nday: 2024-11-15 00:00:00.000 UTC, usd: 85388123.31406502"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-16 00:00:00.000 UTC, usd: 73487029.42315196\"}","output":"Polymarket daily volume:\n\nday: 2024-11-16 00:00:00.000 UTC, usd: 73487029.42315196"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-17 00:00:00.000 UTC, usd: 31952362.05958594\"}","output":"Polymarket daily volume:\n\nday: 2024-11-17 00:00:00.000 UTC, usd: 31952362.05958594"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-18 00:00:00.000 UTC, usd: 56665643.75325994\"}","output":"Polymarket daily volume:\n\nday: 2024-11-18 00:00:00.000 UTC, usd: 56665643.75325994"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-19 00:00:00.000 UTC, usd: 55258055.39631695\"}","output":"Polymarket daily volume:\n\nday: 2024-11-19 00:00:00.000 UTC, usd: 55258055.39631695"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-20 00:00:00.000 UTC, usd: 36109338.67742299\"}","output":"Polymarket daily volume:\n\nday: 2024-11-20 00:00:00.000 UTC, usd: 36109338.67742299"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-21 00:00:00.000 UTC, usd: 57520196.135252096\"}","output":"Polymarket daily volume:\n\nday: 2024-11-21 00:00:00.000 UTC, usd: 57520196.135252096"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-22 00:00:00.000 UTC, usd: 54417225.60496001\"}","output":"Polymarket daily volume:\n\nday: 2024-11-22 00:00:00.000 UTC, usd: 54417225.60496001"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-23 00:00:00.000 UTC, usd: 32802358.232113995\"}","output":"Polymarket daily volume:\n\nday: 2024-11-23 00:00:00.000 UTC, usd: 32802358.232113995"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-24 00:00:00.000 UTC, usd: 38690130.63734394\"}","output":"Polymarket daily volume:\n\nday: 2024-11-24 00:00:00.000 UTC, usd: 38690130.63734394"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-25 00:00:00.000 UTC, usd: 61806613.4861059\"}","output":"Polymarket daily volume:\n\nday: 2024-11-25 00:00:00.000 UTC, usd: 61806613.4861059"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-26 00:00:00.000 UTC, usd: 39154766.67165002\"}","output":"Polymarket daily volume:\n\nday: 2024-11-26 00:00:00.000 UTC, usd: 39154766.67165002"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-27 00:00:00.000 UTC, usd: 41840922.28081795\"}","output":"Polymarket daily volume:\n\nday: 2024-11-27 00:00:00.000 UTC, usd: 41840922.28081795"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-28 00:00:00.000 UTC, usd: 61957870.960981056\"}","output":"Polymarket daily volume:\n\nday: 2024-11-28 00:00:00.000 UTC, usd: 61957870.960981056"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-29 00:00:00.000 UTC, usd: 69246298.91876213\"}","output":"Polymarket daily volume:\n\nday: 2024-11-29 00:00:00.000 UTC, usd: 69246298.91876213"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-30 00:00:00.000 UTC, usd: 38929653.53925096\"}","output":"Polymarket daily volume:\n\nday: 2024-11-30 00:00:00.000 UTC, usd: 38929653.53925096"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-01 00:00:00.000 UTC, usd: 41462935.07671096\"}","output":"Polymarket daily volume:\n\nday: 2024-12-01 00:00:00.000 UTC, usd: 41462935.07671096"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-02 00:00:00.000 UTC, usd: 52679838.17179301\"}","output":"Polymarket daily volume:\n\nday: 2024-12-02 00:00:00.000 UTC, usd: 52679838.17179301"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-03 00:00:00.000 UTC, usd: 58222732.876944065\"}","output":"Polymarket daily volume:\n\nday: 2024-12-03 00:00:00.000 UTC, usd: 58222732.876944065"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-04 00:00:00.000 UTC, usd: 68595811.32860307\"}","output":"Polymarket daily volume:\n\nday: 2024-12-04 00:00:00.000 UTC, usd: 68595811.32860307"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-05 00:00:00.000 UTC, usd: 68194569.35714114\"}","output":"Polymarket daily volume:\n\nday: 2024-12-05 00:00:00.000 UTC, usd: 68194569.35714114"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-06 00:00:00.000 UTC, usd: 74838946.42090401\"}","output":"Polymarket daily volume:\n\nday: 2024-12-06 00:00:00.000 UTC, usd: 74838946.42090401"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-07 00:00:00.000 UTC, usd: 36525235.122943\"}","output":"Polymarket daily volume:\n\nday: 2024-12-07 00:00:00.000 UTC, usd: 36525235.122943"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-08 00:00:00.000 UTC, usd: 36272971.36609699\"}","output":"Polymarket daily volume:\n\nday: 2024-12-08 00:00:00.000 UTC, usd: 36272971.36609699"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-09 00:00:00.000 UTC, usd: 46276016.77061098\"}","output":"Polymarket daily volume:\n\nday: 2024-12-09 00:00:00.000 UTC, usd: 46276016.77061098"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-10 00:00:00.000 UTC, usd: 42688098.76691704\"}","output":"Polymarket daily volume:\n\nday: 2024-12-10 00:00:00.000 UTC, usd: 42688098.76691704"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-11 00:00:00.000 UTC, usd: 56451648.146364026\"}","output":"Polymarket daily volume:\n\nday: 2024-12-11 00:00:00.000 UTC, usd: 56451648.146364026"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-12 00:00:00.000 UTC, usd: 45028009.72842803\"}","output":"Polymarket daily volume:\n\nday: 2024-12-12 00:00:00.000 UTC, usd: 45028009.72842803"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-13 00:00:00.000 UTC, usd: 61838381.18457003\"}","output":"Polymarket daily volume:\n\nday: 2024-12-13 00:00:00.000 UTC, usd: 61838381.18457003"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-14 00:00:00.000 UTC, usd: 30054621.374869026\"}","output":"Polymarket daily volume:\n\nday: 2024-12-14 00:00:00.000 UTC, usd: 30054621.374869026"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-15 00:00:00.000 UTC, usd: 39928301.71727404\"}","output":"Polymarket daily volume:\n\nday: 2024-12-15 00:00:00.000 UTC, usd: 39928301.71727404"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-16 00:00:00.000 UTC, usd: 51107210.48108803\"}","output":"Polymarket daily volume:\n\nday: 2024-12-16 00:00:00.000 UTC, usd: 51107210.48108803"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-17 00:00:00.000 UTC, usd: 57029180.076759115\"}","output":"Polymarket daily volume:\n\nday: 2024-12-17 00:00:00.000 UTC, usd: 57029180.076759115"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-18 00:00:00.000 UTC, usd: 78036428.02560697\"}","output":"Polymarket daily volume:\n\nday: 2024-12-18 00:00:00.000 UTC, usd: 78036428.02560697"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-19 00:00:00.000 UTC, usd: 81742035.96803598\"}","output":"Polymarket daily volume:\n\nday: 2024-12-19 00:00:00.000 UTC, usd: 81742035.96803598"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-20 00:00:00.000 UTC, usd: 73146384.20936199\"}","output":"Polymarket daily volume:\n\nday: 2024-12-20 00:00:00.000 UTC, usd: 73146384.20936199"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-21 00:00:00.000 UTC, usd: 75326597.47640793\"}","output":"Polymarket daily volume:\n\nday: 2024-12-21 00:00:00.000 UTC, usd: 75326597.47640793"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-22 00:00:00.000 UTC, usd: 62733417.462829985\"}","output":"Polymarket daily volume:\n\nday: 2024-12-22 00:00:00.000 UTC, usd: 62733417.462829985"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-23 00:00:00.000 UTC, usd: 81515964.93472399\"}","output":"Polymarket daily volume:\n\nday: 2024-12-23 00:00:00.000 UTC, usd: 81515964.93472399"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-24 00:00:00.000 UTC, usd: 84452697.60240701\"}","output":"Polymarket daily volume:\n\nday: 2024-12-24 00:00:00.000 UTC, usd: 84452697.60240701"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-25 00:00:00.000 UTC, usd: 65611412.90277203\"}","output":"Polymarket daily volume:\n\nday: 2024-12-25 00:00:00.000 UTC, usd: 65611412.90277203"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-26 00:00:00.000 UTC, usd: 75531312.79955502\"}","output":"Polymarket daily volume:\n\nday: 2024-12-26 00:00:00.000 UTC, usd: 75531312.79955502"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-27 00:00:00.000 UTC, usd: 84666482.30961134\"}","output":"Polymarket daily volume:\n\nday: 2024-12-27 00:00:00.000 UTC, usd: 84666482.30961134"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-28 00:00:00.000 UTC, usd: 66314489.88546711\"}","output":"Polymarket daily volume:\n\nday: 2024-12-28 00:00:00.000 UTC, usd: 66314489.88546711"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-29 00:00:00.000 UTC, usd: 79124672.87770194\"}","output":"Polymarket daily volume:\n\nday: 2024-12-29 00:00:00.000 UTC, usd: 79124672.87770194"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-30 00:00:00.000 UTC, usd: 67026283.999560885\"}","output":"Polymarket daily volume:\n\nday: 2024-12-30 00:00:00.000 UTC, usd: 67026283.999560885"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-31 00:00:00.000 UTC, usd: 61504082.1957119\"}","output":"Polymarket daily volume:\n\nday: 2024-12-31 00:00:00.000 UTC, usd: 61504082.1957119"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-01 00:00:00.000 UTC, usd: 50800314.68134794\"}","output":"Polymarket daily volume:\n\nday: 2025-01-01 00:00:00.000 UTC, usd: 50800314.68134794"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-02 00:00:00.000 UTC, usd: 53847325.42475195\"}","output":"Polymarket daily volume:\n\nday: 2025-01-02 00:00:00.000 UTC, usd: 53847325.42475195"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-03 00:00:00.000 UTC, usd: 46429383.92926495\"}","output":"Polymarket daily volume:\n\nday: 2025-01-03 00:00:00.000 UTC, usd: 46429383.92926495"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-04 00:00:00.000 UTC, usd: 32947042.370695993\"}","output":"Polymarket daily volume:\n\nday: 2025-01-04 00:00:00.000 UTC, usd: 32947042.370695993"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-05 00:00:00.000 UTC, usd: 38832085.87390198\"}","output":"Polymarket daily volume:\n\nday: 2025-01-05 00:00:00.000 UTC, usd: 38832085.87390198"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-06 00:00:00.000 UTC, usd: 35272023.70546596\"}","output":"Polymarket daily volume:\n\nday: 2025-01-06 00:00:00.000 UTC, usd: 35272023.70546596"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-07 00:00:00.000 UTC, usd: 35360032.57996298\"}","output":"Polymarket daily volume:\n\nday: 2025-01-07 00:00:00.000 UTC, usd: 35360032.57996298"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-08 00:00:00.000 UTC, usd: 33208108.335891966\"}","output":"Polymarket daily volume:\n\nday: 2025-01-08 00:00:00.000 UTC, usd: 33208108.335891966"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-09 00:00:00.000 UTC, usd: 35644329.186808996\"}","output":"Polymarket daily volume:\n\nday: 2025-01-09 00:00:00.000 UTC, usd: 35644329.186808996"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-10 00:00:00.000 UTC, usd: 40513832.239341974\"}","output":"Polymarket daily volume:\n\nday: 2025-01-10 00:00:00.000 UTC, usd: 40513832.239341974"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-11 00:00:00.000 UTC, usd: 33466705.020306997\"}","output":"Polymarket daily volume:\n\nday: 2025-01-11 00:00:00.000 UTC, usd: 33466705.020306997"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-12 00:00:00.000 UTC, usd: 35015605.96224097\"}","output":"Polymarket daily volume:\n\nday: 2025-01-12 00:00:00.000 UTC, usd: 35015605.96224097"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-13 00:00:00.000 UTC, usd: 36529265.687982015\"}","output":"Polymarket daily volume:\n\nday: 2025-01-13 00:00:00.000 UTC, usd: 36529265.687982015"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-14 00:00:00.000 UTC, usd: 34674910.064938985\"}","output":"Polymarket daily volume:\n\nday: 2025-01-14 00:00:00.000 UTC, usd: 34674910.064938985"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-15 00:00:00.000 UTC, usd: 39174690.48150801\"}","output":"Polymarket daily volume:\n\nday: 2025-01-15 00:00:00.000 UTC, usd: 39174690.48150801"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-16 00:00:00.000 UTC, usd: 35834741.380128995\"}","output":"Polymarket daily volume:\n\nday: 2025-01-16 00:00:00.000 UTC, usd: 35834741.380128995"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-17 00:00:00.000 UTC, usd: 37802164.47748398\"}","output":"Polymarket daily volume:\n\nday: 2025-01-17 00:00:00.000 UTC, usd: 37802164.47748398"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-18 00:00:00.000 UTC, usd: 51490066.41423097\"}","output":"Polymarket daily volume:\n\nday: 2025-01-18 00:00:00.000 UTC, usd: 51490066.41423097"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-19 00:00:00.000 UTC, usd: 71980067.68009694\"}","output":"Polymarket daily volume:\n\nday: 2025-01-19 00:00:00.000 UTC, usd: 71980067.68009694"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-20 00:00:00.000 UTC, usd: 96709324.71025787\"}","output":"Polymarket daily volume:\n\nday: 2025-01-20 00:00:00.000 UTC, usd: 96709324.71025787"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-21 00:00:00.000 UTC, usd: 75100749.98974402\"}","output":"Polymarket daily volume:\n\nday: 2025-01-21 00:00:00.000 UTC, usd: 75100749.98974402"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-22 00:00:00.000 UTC, usd: 30460872.129992988\"}","output":"Polymarket daily volume:\n\nday: 2025-01-22 00:00:00.000 UTC, usd: 30460872.129992988"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-23 00:00:00.000 UTC, usd: 31247928.15104199\"}","output":"Polymarket daily volume:\n\nday: 2025-01-23 00:00:00.000 UTC, usd: 31247928.15104199"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-24 00:00:00.000 UTC, usd: 32406760.850218963\"}","output":"Polymarket daily volume:\n\nday: 2025-01-24 00:00:00.000 UTC, usd: 32406760.850218963"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-25 00:00:00.000 UTC, usd: 26846997.534930017\"}","output":"Polymarket daily volume:\n\nday: 2025-01-25 00:00:00.000 UTC, usd: 26846997.534930017"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-26 00:00:00.000 UTC, usd: 32198952.870107003\"}","output":"Polymarket daily volume:\n\nday: 2025-01-26 00:00:00.000 UTC, usd: 32198952.870107003"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-27 00:00:00.000 UTC, usd: 23854691.439593006\"}","output":"Polymarket daily volume:\n\nday: 2025-01-27 00:00:00.000 UTC, usd: 23854691.439593006"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-28 00:00:00.000 UTC, usd: 20118077.886542007\"}","output":"Polymarket daily volume:\n\nday: 2025-01-28 00:00:00.000 UTC, usd: 20118077.886542007"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-29 00:00:00.000 UTC, usd: 37395442.233462006\"}","output":"Polymarket daily volume:\n\nday: 2025-01-29 00:00:00.000 UTC, usd: 37395442.233462006"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-30 00:00:00.000 UTC, usd: 23321077.25599298\"}","output":"Polymarket daily volume:\n\nday: 2025-01-30 00:00:00.000 UTC, usd: 23321077.25599298"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-31 00:00:00.000 UTC, usd: 32087917.923902016\"}","output":"Polymarket daily volume:\n\nday: 2025-01-31 00:00:00.000 UTC, usd: 32087917.923902016"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-02-01 00:00:00.000 UTC, usd: 3840098.644609003\"}","output":"Polymarket daily volume:\n\nday: 2025-02-01 00:00:00.000 UTC, usd: 3840098.644609003"} +{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-02 00:00:00.000 UTC, usd: 120367344.7677071\"}","output":"Polymarket daily volume:\n\nday: 2024-11-02 00:00:00.000 UTC, usd: 120367344.7677071"} From e1fe47fc61952070f14fe44ed54c4e49ad3b36c3 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Thu, 13 Feb 2025 12:45:25 -0500 Subject: [PATCH 04/10] first test inference works, updated training data format --- .gitignore | 4 +- finetune_example/finetune.py | 114 ++++++++++-------- finetune_example/requirements.txt | 16 ++- finetune_example/test_inference.py | 75 ++++++++++++ finetune_example/training_data.jsonl | 105 +--------------- .../actions/run_remote_finetune.py | 17 ++- 6 files changed, 174 insertions(+), 157 deletions(-) create mode 100644 finetune_example/test_inference.py diff --git a/.gitignore b/.gitignore index f4d6ccb2..41626e2b 100644 --- a/.gitignore +++ b/.gitignore @@ -70,5 +70,7 @@ videofiles/ jsonoutputs/ +# Finetuning finetuned_model -remote_file_sync.py \ No newline at end of file +remote_file_sync.py +finetune_example/training_data2.jsonl \ No newline at end of file diff --git a/finetune_example/finetune.py b/finetune_example/finetune.py index 18db7560..e3055413 100644 --- a/finetune_example/finetune.py +++ b/finetune_example/finetune.py @@ -1,6 +1,6 @@ import os import torch -from unsloth import FastLanguageModel +from unsloth import FastLanguageModel, is_bfloat16_supported from transformers import TrainingArguments from trl import SFTTrainer from datasets import load_dataset @@ -10,20 +10,22 @@ def log_progress(msg): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] 🐰 {msg}") -def format_instruction(example): - """Format the instruction, input, and output into a single text string.""" - # Format: [INST] instruction + input [/INST] output - instruction = example["instruction"] - input_text = example["input"] - output = example["output"] +def format_chat(example): + """Format the chat messages into a single text string.""" + messages = example["messages"] - if input_text: - prompt = f"{instruction}\n{input_text}" - else: - prompt = instruction - # Format for Mistral chat template - example["text"] = f"[INST] {prompt} [/INST] {output}" + formatted_messages = [] + for msg in messages: + if msg["role"] == "system": + # System message goes at the start + formatted_messages.insert(0, f"[INST] {msg['content']} [/INST]") + elif msg["role"] == "user": + formatted_messages.append(f"[INST] {msg['content']} [/INST]") + elif msg["role"] == "assistant": + formatted_messages.append(f"{msg['content']}") + + example["text"] = " ".join(formatted_messages) return example def fine_tune(): @@ -31,77 +33,95 @@ def fine_tune(): # Load and format dataset dataset = load_dataset("json", data_files={"train": "training_data.jsonl"})["train"] - dataset = dataset.map(format_instruction) - print("Dataset features:", dataset.features) - print("First formatted example:", dataset[0]["text"][:200] + "...") + # Model configuration + max_seq_length = 4096 # Increased for longer context + dtype = None # Auto-detect optimal dtype + load_in_4bit = True # Configure model loading with explicit dtypes model, tokenizer = FastLanguageModel.from_pretrained( model_name="unsloth/mistral-7b-v0.3-bnb-4bit", - max_seq_length=2048, - load_in_4bit=True, - quantization_config={ - "load_in_4bit": True, - "bnb_4bit_compute_dtype": torch.bfloat16 - } + max_seq_length=max_seq_length, + dtype=dtype, + load_in_4bit=load_in_4bit ) - # Add LoRA adapters with explicit dtype + # Set the chat template for Mistral + tokenizer.chat_template = """{% for message in messages %} + {% if message['role'] == 'user' %} + {{ '[INST] ' + message['content'] + ' [/INST]' }} + {% elif message['role'] == 'assistant' %} + {{ message['content'] + '' }} + {% elif message['role'] == 'system' %} + {{ '[INST] ' + message['content'] + ' [/INST]' }} + {% endif %} + {% endfor %}""" + + # Add LoRA adapters with optimized settings model = FastLanguageModel.get_peft_model( model, r=16, - target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], + target_modules=["q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj"], lora_alpha=16, - lora_dropout=0, - bias="none", - use_gradient_checkpointing=True, + lora_dropout=0, # Optimized setting + bias="none", # Optimized setting + use_gradient_checkpointing="unsloth", # Uses 30% less VRAM + random_state=3407, + use_rslora=False, + loftq_config=None ) # Training arguments with mixed precision settings training_args = TrainingArguments( output_dir="./finetuned_model", num_train_epochs=3, - per_device_train_batch_size=2, + per_device_train_batch_size=1, # Reduced for longer sequences gradient_accumulation_steps=4, + warmup_steps=5, learning_rate=2e-4, logging_steps=1, optim="adamw_8bit", - bf16=True, # Use bfloat16 precision + weight_decay=0.01, + lr_scheduler_type="linear", + # Use appropriate precision based on hardware + fp16=not is_bfloat16_supported(), + bf16=is_bfloat16_supported(), torch_compile=False, # Disable torch compile for stability + seed=3407 ) - # Initialize trainer with the correct dataset field + # Initialize trainer trainer = SFTTrainer( model=model, train_dataset=dataset, - dataset_text_field="text", # Now using the formatted text field args=training_args, tokenizer=tokenizer, - max_seq_length=2048, - packing=False, + max_seq_length=max_seq_length, + dataset_num_proc=2, + packing=False ) # Train log_progress("🏃 Training model...") - trainer.train() + trainer_stats = trainer.train() # Save the model log_progress("💾 Saving fine-tuned model...") - # Save model to a specific directory - output_dir = os.path.join(os.getcwd(), "finetuned_model") - os.makedirs(output_dir, exist_ok=True) - trainer.save_model(output_dir) + output_dir = "./finetuned_model" + + # Save model and tokenizer + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) - # Force sync the saved files - if os.path.exists(output_dir): - # Save a marker file to verify directory contents - with open(os.path.join(output_dir, "model_saved.txt"), "w") as f: - f.write("Model saved successfully") + # Optionally save in 8-bit GGUF format + try: + model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=["q8_0"]) + except Exception as e: + print(f"Note: GGUF export failed (this is optional): {e}") - log_progress("✨ Fine-tuning complete!") - return {"status": "success", "model_path": output_dir} + return trainer_stats if __name__ == "__main__": - result = fine_tune() - print(f"Model saved to: {result['model_path']}") \ No newline at end of file + result = fine_tune() \ No newline at end of file diff --git a/finetune_example/requirements.txt b/finetune_example/requirements.txt index 7fd1139f..aeb40a4e 100644 --- a/finetune_example/requirements.txt +++ b/finetune_example/requirements.txt @@ -1,8 +1,12 @@ unsloth -torch>=2.0.0 -transformers>=4.37.0 -accelerate>=0.27.1 -bitsandbytes>=0.41.3 -trl>=0.7.11 +xformers +torch --index-url https://download.pytorch.org/whl/cu118 +torchvision --index-url https://download.pytorch.org/whl/cu118 +transformers +accelerate +bitsandbytes +trl datasets -peft>=0.7.0 \ No newline at end of file +peft +ninja +numpy<2.0 \ No newline at end of file diff --git a/finetune_example/test_inference.py b/finetune_example/test_inference.py new file mode 100644 index 00000000..bf1fabd2 --- /dev/null +++ b/finetune_example/test_inference.py @@ -0,0 +1,75 @@ +import json +import os +import sys +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +def run_inference(prompt): + # Ensure CUDA is available and initialized + if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + + # Force CUDA initialization + torch.cuda.init() + + # Print CUDA info for debugging + print(f"CUDA Device: {torch.cuda.get_device_name()}") + print(f"CUDA Version: {torch.version.cuda}") + + # Always use the finetuned model directory for inference + model_dir = os.path.abspath("./finetuned_model") + + try: + # Load base model and tokenizer + model = AutoModelForCausalLM.from_pretrained( + model_dir, + torch_dtype=torch.float16, + device_map="auto" + ) + tokenizer = AutoTokenizer.from_pretrained(model_dir) + + # Prepare input + test_prompt = f"[INST] {prompt} [/INST]" + inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device) + + # Generate + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=512, + temperature=0.7, + top_p=0.95, + do_sample=True + ) + + # Decode output + generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Format output + output = { + "prompt": prompt, + "response": generated_text + } + + # Save and print results + with open("inference_output.json", "w") as f: + json.dump(output, f, indent=2) + + print("\n=== Test Inference Results ===") + print(f"Prompt: {output['prompt']}") + print(f"Response: {output['response']}") + print("============================\n") + + return output + + except Exception as e: + print(f"Error during inference: {str(e)}") + raise + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python test_inference.py \"your prompt here\"") + sys.exit(1) + + prompt = sys.argv[1] + run_inference(prompt) \ No newline at end of file diff --git a/finetune_example/training_data.jsonl b/finetune_example/training_data.jsonl index 650a4b5b..d8027a8b 100644 --- a/finetune_example/training_data.jsonl +++ b/finetune_example/training_data.jsonl @@ -1,100 +1,5 @@ -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-06-17 00:00:00.000 UTC, blockchain: avalanche_c, usd_volume: 12466440.018553272\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-06-17 00:00:00.000 UTC, blockchain: avalanche_c, usd_volume: 12466440.018553272"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-04-08 00:00:00.000 UTC, blockchain: polygon, usd_volume: 785688892.5893831\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-04-08 00:00:00.000 UTC, blockchain: polygon, usd_volume: 785688892.5893831"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-08-12 00:00:00.000 UTC, blockchain: zora, usd_volume: 864660.1158771071\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-08-12 00:00:00.000 UTC, blockchain: zora, usd_volume: 864660.1158771071"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-06-17 00:00:00.000 UTC, blockchain: zora, usd_volume: 184387.17609555868\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-06-17 00:00:00.000 UTC, blockchain: zora, usd_volume: 184387.17609555868"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-07-29 00:00:00.000 UTC, blockchain: bnb, usd_volume: 281940807.4814643\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-07-29 00:00:00.000 UTC, blockchain: bnb, usd_volume: 281940807.4814643"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-03-04 00:00:00.000 UTC, blockchain: zksync, usd_volume: 221583.92649303746\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-03-04 00:00:00.000 UTC, blockchain: zksync, usd_volume: 221583.92649303746"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-07-22 00:00:00.000 UTC, blockchain: blast, usd_volume: 2614166.6535020834\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-07-22 00:00:00.000 UTC, blockchain: blast, usd_volume: 2614166.6535020834"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-11-18 00:00:00.000 UTC, blockchain: base, usd_volume: 3431475959.9909487\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-11-18 00:00:00.000 UTC, blockchain: base, usd_volume: 3431475959.9909487"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Weekly Uniswap volume by chain\",\"data\":\"_col1: 2024-10-28 00:00:00.000 UTC, blockchain: polygon, usd_volume: 689676631.8130487\"}","output":"Weekly Uniswap volume by chain:\n\n_col1: 2024-10-28 00:00:00.000 UTC, blockchain: polygon, usd_volume: 689676631.8130487"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-04 00:00:00.000 UTC, usd: 160020286.43249646\"}","output":"Polymarket daily volume:\n\nday: 2024-11-04 00:00:00.000 UTC, usd: 160020286.43249646"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-05 00:00:00.000 UTC, usd: 238387272.50343198\"}","output":"Polymarket daily volume:\n\nday: 2024-11-05 00:00:00.000 UTC, usd: 238387272.50343198"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-06 00:00:00.000 UTC, usd: 367352111.2733602\"}","output":"Polymarket daily volume:\n\nday: 2024-11-06 00:00:00.000 UTC, usd: 367352111.2733602"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-07 00:00:00.000 UTC, usd: 54086823.63269596\"}","output":"Polymarket daily volume:\n\nday: 2024-11-07 00:00:00.000 UTC, usd: 54086823.63269596"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-08 00:00:00.000 UTC, usd: 61978622.08457193\"}","output":"Polymarket daily volume:\n\nday: 2024-11-08 00:00:00.000 UTC, usd: 61978622.08457193"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-09 00:00:00.000 UTC, usd: 60770440.102616906\"}","output":"Polymarket daily volume:\n\nday: 2024-11-09 00:00:00.000 UTC, usd: 60770440.102616906"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-10 00:00:00.000 UTC, usd: 71852907.82880792\"}","output":"Polymarket daily volume:\n\nday: 2024-11-10 00:00:00.000 UTC, usd: 71852907.82880792"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-11 00:00:00.000 UTC, usd: 75559628.377674\"}","output":"Polymarket daily volume:\n\nday: 2024-11-11 00:00:00.000 UTC, usd: 75559628.377674"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-12 00:00:00.000 UTC, usd: 66244211.641023025\"}","output":"Polymarket daily volume:\n\nday: 2024-11-12 00:00:00.000 UTC, usd: 66244211.641023025"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-13 00:00:00.000 UTC, usd: 81315019.16197997\"}","output":"Polymarket daily volume:\n\nday: 2024-11-13 00:00:00.000 UTC, usd: 81315019.16197997"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-14 00:00:00.000 UTC, usd: 83868143.723078\"}","output":"Polymarket daily volume:\n\nday: 2024-11-14 00:00:00.000 UTC, usd: 83868143.723078"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-15 00:00:00.000 UTC, usd: 85388123.31406502\"}","output":"Polymarket daily volume:\n\nday: 2024-11-15 00:00:00.000 UTC, usd: 85388123.31406502"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-16 00:00:00.000 UTC, usd: 73487029.42315196\"}","output":"Polymarket daily volume:\n\nday: 2024-11-16 00:00:00.000 UTC, usd: 73487029.42315196"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-17 00:00:00.000 UTC, usd: 31952362.05958594\"}","output":"Polymarket daily volume:\n\nday: 2024-11-17 00:00:00.000 UTC, usd: 31952362.05958594"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-18 00:00:00.000 UTC, usd: 56665643.75325994\"}","output":"Polymarket daily volume:\n\nday: 2024-11-18 00:00:00.000 UTC, usd: 56665643.75325994"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-19 00:00:00.000 UTC, usd: 55258055.39631695\"}","output":"Polymarket daily volume:\n\nday: 2024-11-19 00:00:00.000 UTC, usd: 55258055.39631695"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-20 00:00:00.000 UTC, usd: 36109338.67742299\"}","output":"Polymarket daily volume:\n\nday: 2024-11-20 00:00:00.000 UTC, usd: 36109338.67742299"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-21 00:00:00.000 UTC, usd: 57520196.135252096\"}","output":"Polymarket daily volume:\n\nday: 2024-11-21 00:00:00.000 UTC, usd: 57520196.135252096"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-22 00:00:00.000 UTC, usd: 54417225.60496001\"}","output":"Polymarket daily volume:\n\nday: 2024-11-22 00:00:00.000 UTC, usd: 54417225.60496001"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-23 00:00:00.000 UTC, usd: 32802358.232113995\"}","output":"Polymarket daily volume:\n\nday: 2024-11-23 00:00:00.000 UTC, usd: 32802358.232113995"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-24 00:00:00.000 UTC, usd: 38690130.63734394\"}","output":"Polymarket daily volume:\n\nday: 2024-11-24 00:00:00.000 UTC, usd: 38690130.63734394"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-25 00:00:00.000 UTC, usd: 61806613.4861059\"}","output":"Polymarket daily volume:\n\nday: 2024-11-25 00:00:00.000 UTC, usd: 61806613.4861059"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-26 00:00:00.000 UTC, usd: 39154766.67165002\"}","output":"Polymarket daily volume:\n\nday: 2024-11-26 00:00:00.000 UTC, usd: 39154766.67165002"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-27 00:00:00.000 UTC, usd: 41840922.28081795\"}","output":"Polymarket daily volume:\n\nday: 2024-11-27 00:00:00.000 UTC, usd: 41840922.28081795"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-28 00:00:00.000 UTC, usd: 61957870.960981056\"}","output":"Polymarket daily volume:\n\nday: 2024-11-28 00:00:00.000 UTC, usd: 61957870.960981056"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-29 00:00:00.000 UTC, usd: 69246298.91876213\"}","output":"Polymarket daily volume:\n\nday: 2024-11-29 00:00:00.000 UTC, usd: 69246298.91876213"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-30 00:00:00.000 UTC, usd: 38929653.53925096\"}","output":"Polymarket daily volume:\n\nday: 2024-11-30 00:00:00.000 UTC, usd: 38929653.53925096"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-01 00:00:00.000 UTC, usd: 41462935.07671096\"}","output":"Polymarket daily volume:\n\nday: 2024-12-01 00:00:00.000 UTC, usd: 41462935.07671096"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-02 00:00:00.000 UTC, usd: 52679838.17179301\"}","output":"Polymarket daily volume:\n\nday: 2024-12-02 00:00:00.000 UTC, usd: 52679838.17179301"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-03 00:00:00.000 UTC, usd: 58222732.876944065\"}","output":"Polymarket daily volume:\n\nday: 2024-12-03 00:00:00.000 UTC, usd: 58222732.876944065"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-04 00:00:00.000 UTC, usd: 68595811.32860307\"}","output":"Polymarket daily volume:\n\nday: 2024-12-04 00:00:00.000 UTC, usd: 68595811.32860307"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-05 00:00:00.000 UTC, usd: 68194569.35714114\"}","output":"Polymarket daily volume:\n\nday: 2024-12-05 00:00:00.000 UTC, usd: 68194569.35714114"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-06 00:00:00.000 UTC, usd: 74838946.42090401\"}","output":"Polymarket daily volume:\n\nday: 2024-12-06 00:00:00.000 UTC, usd: 74838946.42090401"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-07 00:00:00.000 UTC, usd: 36525235.122943\"}","output":"Polymarket daily volume:\n\nday: 2024-12-07 00:00:00.000 UTC, usd: 36525235.122943"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-08 00:00:00.000 UTC, usd: 36272971.36609699\"}","output":"Polymarket daily volume:\n\nday: 2024-12-08 00:00:00.000 UTC, usd: 36272971.36609699"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-09 00:00:00.000 UTC, usd: 46276016.77061098\"}","output":"Polymarket daily volume:\n\nday: 2024-12-09 00:00:00.000 UTC, usd: 46276016.77061098"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-10 00:00:00.000 UTC, usd: 42688098.76691704\"}","output":"Polymarket daily volume:\n\nday: 2024-12-10 00:00:00.000 UTC, usd: 42688098.76691704"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-11 00:00:00.000 UTC, usd: 56451648.146364026\"}","output":"Polymarket daily volume:\n\nday: 2024-12-11 00:00:00.000 UTC, usd: 56451648.146364026"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-12 00:00:00.000 UTC, usd: 45028009.72842803\"}","output":"Polymarket daily volume:\n\nday: 2024-12-12 00:00:00.000 UTC, usd: 45028009.72842803"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-13 00:00:00.000 UTC, usd: 61838381.18457003\"}","output":"Polymarket daily volume:\n\nday: 2024-12-13 00:00:00.000 UTC, usd: 61838381.18457003"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-14 00:00:00.000 UTC, usd: 30054621.374869026\"}","output":"Polymarket daily volume:\n\nday: 2024-12-14 00:00:00.000 UTC, usd: 30054621.374869026"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-15 00:00:00.000 UTC, usd: 39928301.71727404\"}","output":"Polymarket daily volume:\n\nday: 2024-12-15 00:00:00.000 UTC, usd: 39928301.71727404"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-16 00:00:00.000 UTC, usd: 51107210.48108803\"}","output":"Polymarket daily volume:\n\nday: 2024-12-16 00:00:00.000 UTC, usd: 51107210.48108803"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-17 00:00:00.000 UTC, usd: 57029180.076759115\"}","output":"Polymarket daily volume:\n\nday: 2024-12-17 00:00:00.000 UTC, usd: 57029180.076759115"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-18 00:00:00.000 UTC, usd: 78036428.02560697\"}","output":"Polymarket daily volume:\n\nday: 2024-12-18 00:00:00.000 UTC, usd: 78036428.02560697"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-19 00:00:00.000 UTC, usd: 81742035.96803598\"}","output":"Polymarket daily volume:\n\nday: 2024-12-19 00:00:00.000 UTC, usd: 81742035.96803598"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-20 00:00:00.000 UTC, usd: 73146384.20936199\"}","output":"Polymarket daily volume:\n\nday: 2024-12-20 00:00:00.000 UTC, usd: 73146384.20936199"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-21 00:00:00.000 UTC, usd: 75326597.47640793\"}","output":"Polymarket daily volume:\n\nday: 2024-12-21 00:00:00.000 UTC, usd: 75326597.47640793"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-22 00:00:00.000 UTC, usd: 62733417.462829985\"}","output":"Polymarket daily volume:\n\nday: 2024-12-22 00:00:00.000 UTC, usd: 62733417.462829985"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-23 00:00:00.000 UTC, usd: 81515964.93472399\"}","output":"Polymarket daily volume:\n\nday: 2024-12-23 00:00:00.000 UTC, usd: 81515964.93472399"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-24 00:00:00.000 UTC, usd: 84452697.60240701\"}","output":"Polymarket daily volume:\n\nday: 2024-12-24 00:00:00.000 UTC, usd: 84452697.60240701"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-25 00:00:00.000 UTC, usd: 65611412.90277203\"}","output":"Polymarket daily volume:\n\nday: 2024-12-25 00:00:00.000 UTC, usd: 65611412.90277203"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-26 00:00:00.000 UTC, usd: 75531312.79955502\"}","output":"Polymarket daily volume:\n\nday: 2024-12-26 00:00:00.000 UTC, usd: 75531312.79955502"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-27 00:00:00.000 UTC, usd: 84666482.30961134\"}","output":"Polymarket daily volume:\n\nday: 2024-12-27 00:00:00.000 UTC, usd: 84666482.30961134"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-28 00:00:00.000 UTC, usd: 66314489.88546711\"}","output":"Polymarket daily volume:\n\nday: 2024-12-28 00:00:00.000 UTC, usd: 66314489.88546711"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-29 00:00:00.000 UTC, usd: 79124672.87770194\"}","output":"Polymarket daily volume:\n\nday: 2024-12-29 00:00:00.000 UTC, usd: 79124672.87770194"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-30 00:00:00.000 UTC, usd: 67026283.999560885\"}","output":"Polymarket daily volume:\n\nday: 2024-12-30 00:00:00.000 UTC, usd: 67026283.999560885"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-12-31 00:00:00.000 UTC, usd: 61504082.1957119\"}","output":"Polymarket daily volume:\n\nday: 2024-12-31 00:00:00.000 UTC, usd: 61504082.1957119"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-01 00:00:00.000 UTC, usd: 50800314.68134794\"}","output":"Polymarket daily volume:\n\nday: 2025-01-01 00:00:00.000 UTC, usd: 50800314.68134794"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-02 00:00:00.000 UTC, usd: 53847325.42475195\"}","output":"Polymarket daily volume:\n\nday: 2025-01-02 00:00:00.000 UTC, usd: 53847325.42475195"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-03 00:00:00.000 UTC, usd: 46429383.92926495\"}","output":"Polymarket daily volume:\n\nday: 2025-01-03 00:00:00.000 UTC, usd: 46429383.92926495"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-04 00:00:00.000 UTC, usd: 32947042.370695993\"}","output":"Polymarket daily volume:\n\nday: 2025-01-04 00:00:00.000 UTC, usd: 32947042.370695993"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-05 00:00:00.000 UTC, usd: 38832085.87390198\"}","output":"Polymarket daily volume:\n\nday: 2025-01-05 00:00:00.000 UTC, usd: 38832085.87390198"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-06 00:00:00.000 UTC, usd: 35272023.70546596\"}","output":"Polymarket daily volume:\n\nday: 2025-01-06 00:00:00.000 UTC, usd: 35272023.70546596"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-07 00:00:00.000 UTC, usd: 35360032.57996298\"}","output":"Polymarket daily volume:\n\nday: 2025-01-07 00:00:00.000 UTC, usd: 35360032.57996298"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-08 00:00:00.000 UTC, usd: 33208108.335891966\"}","output":"Polymarket daily volume:\n\nday: 2025-01-08 00:00:00.000 UTC, usd: 33208108.335891966"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-09 00:00:00.000 UTC, usd: 35644329.186808996\"}","output":"Polymarket daily volume:\n\nday: 2025-01-09 00:00:00.000 UTC, usd: 35644329.186808996"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-10 00:00:00.000 UTC, usd: 40513832.239341974\"}","output":"Polymarket daily volume:\n\nday: 2025-01-10 00:00:00.000 UTC, usd: 40513832.239341974"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-11 00:00:00.000 UTC, usd: 33466705.020306997\"}","output":"Polymarket daily volume:\n\nday: 2025-01-11 00:00:00.000 UTC, usd: 33466705.020306997"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-12 00:00:00.000 UTC, usd: 35015605.96224097\"}","output":"Polymarket daily volume:\n\nday: 2025-01-12 00:00:00.000 UTC, usd: 35015605.96224097"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-13 00:00:00.000 UTC, usd: 36529265.687982015\"}","output":"Polymarket daily volume:\n\nday: 2025-01-13 00:00:00.000 UTC, usd: 36529265.687982015"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-14 00:00:00.000 UTC, usd: 34674910.064938985\"}","output":"Polymarket daily volume:\n\nday: 2025-01-14 00:00:00.000 UTC, usd: 34674910.064938985"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-15 00:00:00.000 UTC, usd: 39174690.48150801\"}","output":"Polymarket daily volume:\n\nday: 2025-01-15 00:00:00.000 UTC, usd: 39174690.48150801"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-16 00:00:00.000 UTC, usd: 35834741.380128995\"}","output":"Polymarket daily volume:\n\nday: 2025-01-16 00:00:00.000 UTC, usd: 35834741.380128995"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-17 00:00:00.000 UTC, usd: 37802164.47748398\"}","output":"Polymarket daily volume:\n\nday: 2025-01-17 00:00:00.000 UTC, usd: 37802164.47748398"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-18 00:00:00.000 UTC, usd: 51490066.41423097\"}","output":"Polymarket daily volume:\n\nday: 2025-01-18 00:00:00.000 UTC, usd: 51490066.41423097"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-19 00:00:00.000 UTC, usd: 71980067.68009694\"}","output":"Polymarket daily volume:\n\nday: 2025-01-19 00:00:00.000 UTC, usd: 71980067.68009694"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-20 00:00:00.000 UTC, usd: 96709324.71025787\"}","output":"Polymarket daily volume:\n\nday: 2025-01-20 00:00:00.000 UTC, usd: 96709324.71025787"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-21 00:00:00.000 UTC, usd: 75100749.98974402\"}","output":"Polymarket daily volume:\n\nday: 2025-01-21 00:00:00.000 UTC, usd: 75100749.98974402"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-22 00:00:00.000 UTC, usd: 30460872.129992988\"}","output":"Polymarket daily volume:\n\nday: 2025-01-22 00:00:00.000 UTC, usd: 30460872.129992988"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-23 00:00:00.000 UTC, usd: 31247928.15104199\"}","output":"Polymarket daily volume:\n\nday: 2025-01-23 00:00:00.000 UTC, usd: 31247928.15104199"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-24 00:00:00.000 UTC, usd: 32406760.850218963\"}","output":"Polymarket daily volume:\n\nday: 2025-01-24 00:00:00.000 UTC, usd: 32406760.850218963"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-25 00:00:00.000 UTC, usd: 26846997.534930017\"}","output":"Polymarket daily volume:\n\nday: 2025-01-25 00:00:00.000 UTC, usd: 26846997.534930017"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-26 00:00:00.000 UTC, usd: 32198952.870107003\"}","output":"Polymarket daily volume:\n\nday: 2025-01-26 00:00:00.000 UTC, usd: 32198952.870107003"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-27 00:00:00.000 UTC, usd: 23854691.439593006\"}","output":"Polymarket daily volume:\n\nday: 2025-01-27 00:00:00.000 UTC, usd: 23854691.439593006"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-28 00:00:00.000 UTC, usd: 20118077.886542007\"}","output":"Polymarket daily volume:\n\nday: 2025-01-28 00:00:00.000 UTC, usd: 20118077.886542007"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-29 00:00:00.000 UTC, usd: 37395442.233462006\"}","output":"Polymarket daily volume:\n\nday: 2025-01-29 00:00:00.000 UTC, usd: 37395442.233462006"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-30 00:00:00.000 UTC, usd: 23321077.25599298\"}","output":"Polymarket daily volume:\n\nday: 2025-01-30 00:00:00.000 UTC, usd: 23321077.25599298"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-01-31 00:00:00.000 UTC, usd: 32087917.923902016\"}","output":"Polymarket daily volume:\n\nday: 2025-01-31 00:00:00.000 UTC, usd: 32087917.923902016"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2025-02-01 00:00:00.000 UTC, usd: 3840098.644609003\"}","output":"Polymarket daily volume:\n\nday: 2025-02-01 00:00:00.000 UTC, usd: 3840098.644609003"} -{"instruction":"Analyze this chart data and provide insights:","input":"{\"chartTitle\":\"Polymarket daily volume\",\"data\":\"day: 2024-11-02 00:00:00.000 UTC, usd: 120367344.7677071\"}","output":"Polymarket daily volume:\n\nday: 2024-11-02 00:00:00.000 UTC, usd: 120367344.7677071"} +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE trip (\n\tid INTEGER, \n\tduration INTEGER, \n\tstart_date TEXT, \n\tstart_station_name TEXT, \n\tstart_station_id INTEGER, \n\tend_date TEXT, \n\tend_station_name TEXT, \n\tend_station_id INTEGER, \n\tbike_id INTEGER, \n\tsubscription_type TEXT, \n\tzip_code INTEGER, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in trip and 3 examples in each column for high cardinality columns :\nid : 900645, 900752, 900524\nduration : 1131, 2146, 1155\nstart_date : 8/21/2015 17:39, 8/21/2015 17:03, 8/21/2015 17:16\nstart_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nstart_station_id : 56, 65, 49\nend_date : 8/21/2015 17:19, 8/21/2015 18:08, 8/21/2015 17:32\nend_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nend_station_id : 56, 65, 49\nbike_id : 586, 56, 65\nzip_code : 94070, 94530, 94040-1724\n*/\n/*\nColumns in trip and all categories for low cardinality columns :\nsubscription_type : Customer, Subscriber\n*/\n \nCREATE TABLE \"Problems\" (\n\tproblem_id INTEGER, \n\tproduct_id INTEGER NOT NULL, \n\tclosure_authorised_by_staff_id INTEGER NOT NULL, \n\treported_by_staff_id INTEGER NOT NULL, \n\tdate_problem_reported DATETIME NOT NULL, \n\tdate_problem_closed DATETIME, \n\tproblem_description VARCHAR(255), \n\tother_problem_details VARCHAR(255), \n\tPRIMARY KEY (problem_id), \n\tFOREIGN KEY(reported_by_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(product_id) REFERENCES \"Product\" (product_id), \n\tFOREIGN KEY(closure_authorised_by_staff_id) REFERENCES \"Staff\" (staff_id)\n)\n/*\nColumns in Problems and 3 examples in each column for high cardinality columns :\nproblem_id : 1, 13, 11\nclosure_authorised_by_staff_id : 1, 13, 2\ndate_problem_reported : 1995-05-14 08:32:56, 1988-11-07 16:09:31, 1986-11-13 07:30:55\ndate_problem_closed : 1974-09-20 13:42:19, 1997-10-18 20:09:57, 2004-06-20 01:08:25\nproblem_description : d, i, s\n*/\n/*\nColumns in Problems and all categories for low cardinality columns :\nproduct_id : 1, 13, 2, 5, 7, 8, 4, 6, 15\nreported_by_staff_id : 1, 13, 11, 2, 5, 7, 4, 14, 10\nother_problem_details : f, m, i, s, k, l, p, v, c\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE category (\n\tcategory_id INTEGER NOT NULL, \n\tname VARCHAR(25) NOT NULL, \n\tlast_update TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, \n\tPRIMARY KEY (category_id)\n)\n/*\nColumns in category and 3 examples in each column for high cardinality columns :\ncategory_id : 1, 16, 13\nname : Family, Sci-Fi, Action\n*/\n/*\nColumns in category and all categories for low cardinality columns :\nlast_update : 2006-02-15 04:46:27\n*/\n \nCREATE TABLE ship (\n\t\"Ship_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Type\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Tonnage\" INTEGER, \n\tPRIMARY KEY (\"Ship_ID\")\n)\n/*\nColumns in ship and all categories for low cardinality columns :\nShip_ID : 1, 2, 5, 7, 8, 4, 6, 3\nName : Clan McTavish, Farringford, Appam, Author, Dromonby, Corbridge, Trader, Ariadne\nType : Battle ship, Cargo ship\nNationality : United States, United Kingdom\nTonnage : 3035, 3146, 7781, 3496, 3687, 5816, 3627, 3608\n*/\n \nCREATE TABLE member_attendance (\n\t\"Member_ID\" INTEGER, \n\t\"Performance_ID\" INTEGER, \n\t\"Num_of_Pieces\" INTEGER, \n\tPRIMARY KEY (\"Member_ID\", \"Performance_ID\"), \n\tFOREIGN KEY(\"Performance_ID\") REFERENCES performance (\"Performance_ID\"), \n\tFOREIGN KEY(\"Member_ID\") REFERENCES member (\"Member_ID\")\n)\n/*\nColumns in member_attendance and all categories for low cardinality columns :\nMember_ID : 1, 11, 2, 5, 7, 4, 3\nPerformance_ID : 1, 2, 4, 6, 3\nNum_of_Pieces : 1, 2, 4, 3\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE chip_model (\n\t\"Model_name\" TEXT, \n\t\"Launch_year\" REAL, \n\t\"RAM_MiB\" REAL, \n\t\"ROM_MiB\" REAL, \n\t\"Slots\" TEXT, \n\t\"WiFi\" TEXT, \n\t\"Bluetooth\" TEXT, \n\tPRIMARY KEY (\"Model_name\")\n)\n/*\nColumns in chip_model and 3 examples in each column for high cardinality columns :\nModel_name : X30 mid-range, X50 Advanced, X51 mid-range\n*/\n/*\nColumns in chip_model and all categories for low cardinality columns :\nLaunch_year : 2002.0, 2005.0, 2004.0, 2003.0\nRAM_MiB : 32.0, 64.0\nROM_MiB : 48.0, 256.0, 128.0, 32.0, 64.0\nSlots : 1CFII,1SD, 1SD\nWiFi : 802.11b, No\nBluetooth : 1.2, Yes, No, 1.1\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE mountain (\n\t\"Mountain_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Height\" REAL, \n\t\"Prominence\" REAL, \n\t\"Range\" TEXT, \n\t\"Country\" TEXT, \n\tPRIMARY KEY (\"Mountain_ID\")\n)\n/*\nColumns in mountain and all categories for low cardinality columns :\nMountain_ID : 1, 2, 5, 7, 4, 6, 3\nName : Ngaliema / Mt Stanley (Margherita Pk), Mount Kenya (Lenana), Kibo (Uhuru Pk), Ngaliema / Mt Stanley (Savoia Pk), Mount Kenya (Batian), Duwoni / Mt Speke (Vittorio Emanuele Pk), Mawenzi (Hans Meyer Pk)\nHeight : 5109.0, 5199.0, 5895.0, 4890.0, 4985.0, 4977.0, 5148.0\nProminence : 720.0, 850.0, 3951.0, 3825.0, 130.0, 5885.0, 110.0\nRange : Kilimanjaro, Mount Kenya, Rwenzori\nCountry : DR Congo Uganda, Uganda, Tanzania, Kenya\n*/\n \nCREATE TABLE \"Restaurant_Type\" (\n\t\"ResTypeID\" INTEGER, \n\t\"ResTypeName\" VARCHAR(40), \n\t\"ResTypeDescription\" VARCHAR(100), \n\tPRIMARY KEY (\"ResTypeID\")\n)\n/*\nColumns in Restaurant_Type and all categories for low cardinality columns :\nResTypeID : 1, 2\nResTypeName : Sandwich, Stir-fry\nResTypeDescription : Classic Chinese cooking., Simplest there is.\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE \"Country\" (\n\tid INTEGER, \n\tname TEXT, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in Country and 3 examples in each column for high cardinality columns :\nid : 1, 19694, 7809\nname : Scotland, Italy, Spain\n*/\n\n \nCREATE TABLE artist (\n\tartist_name TEXT(50) NOT NULL, \n\tcountry TEXT(20), \n\tgender TEXT(20), \n\tpreferred_genre TEXT(50), \n\tCONSTRAINT a_name PRIMARY KEY (artist_name), \n\tFOREIGN KEY(preferred_genre) REFERENCES genre (g_name) ON DELETE CASCADE\n)\n/*\nColumns in artist and all categories for low cardinality columns :\nartist_name : Prity, Michel, Topu, Shrikanta, Enrique, Farida\ncountry : India, UK, USA, Bangladesh\ngender : Male, Female\npreferred_genre : tagore, folk, modern, nazrul, blues, pop\n*/\n \nCREATE TABLE \"Organizations\" (\n\torganization_id INTEGER NOT NULL, \n\tparent_organization_id INTEGER, \n\torganization_details VARCHAR(255), \n\tPRIMARY KEY (organization_id)\n)\n/*\nColumns in Organizations and all categories for low cardinality columns :\norganization_id : 7, 8, 10\nparent_organization_id : 7, 8\norganization_details : Denesik and Sons Party, Reinger, Hudson and Nolan Group, Robel-Schulist Group\n*/\n \nCREATE TABLE school (\n\t\"School_ID\" INTEGER, \n\t\"School\" TEXT, \n\t\"Location\" TEXT, \n\t\"Enrollment\" REAL, \n\t\"Founded\" REAL, \n\t\"Denomination\" TEXT, \n\t\"Boys_or_Girls\" TEXT, \n\t\"Day_or_Boarding\" TEXT, \n\t\"Year_Entered_Competition\" REAL, \n\t\"School_Colors\" TEXT, \n\tPRIMARY KEY (\"School_ID\")\n)\n/*\nColumns in school and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nSchool : St Aloysius' College, Cranbrook School, Waverley College, Knox Grammar School, Barker College, Trinity Grammar School\nLocation : Hornsby, Summer Hill, Waverley, Bellevue Hill, Milsons Point, Wahroonga\nEnrollment : 1000.0, 1850.0, 2200.0, 1200.0, 2300.0, 1430.0\nFounded : 1918.0, 1924.0, 1913.0, 1879.0, 1903.0, 1890.0\nDenomination : Catholic, Uniting Church, Anglican\nBoys_or_Girls : Boys only to Yr 9 Co-ed Year 10 to 12, Boys\nDay_or_Boarding : Day, Day & Boarding\nYear_Entered_Competition : 1944.0, 1929.0\nSchool_Colors : Royal Blue and Gold, Black & Blue, Red, White & Blue, Red & Blue, Green and White\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE journalist (\n\t\"journalist_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Age\" TEXT, \n\t\"Years_working\" INTEGER, \n\tPRIMARY KEY (\"journalist_ID\")\n)\n/*\nColumns in journalist and 3 examples in each column for high cardinality columns :\njournalist_ID : 1, 11, 2\nName : Jack Meaney, Herbert Swindells, Jackie Waring\n*/\n/*\nColumns in journalist and all categories for low cardinality columns :\nNationality : Northern Ireland, Wales, England\nAge : 37, 28, 25, 33, 34, 43, 27, 29\nYears_working : 1, 5, 7, 8, 21, 6, 3, 12, 10, 9\n*/\n \nCREATE TABLE list (\n\t\"LastName\" TEXT, \n\t\"FirstName\" TEXT, \n\t\"Grade\" INTEGER, \n\t\"Classroom\" INTEGER, \n\tPRIMARY KEY (\"LastName\", \"FirstName\")\n)\n/*\nColumns in list and 3 examples in each column for high cardinality columns :\nLastName : HOUTCHENS, GELL, FLACHS\nFirstName : RAY, EMILE, PATRINA\nClassroom : 109, 110, 106\n*/\n/*\nColumns in list and all categories for low cardinality columns :\nGrade : 1, 2, 5, 4, 6, 3, 0\n*/\n"}, {"role": "user", "content": "How many heads of the departments are older than 56 ?"}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: age.\nThe SQL query I'll be generating is:\nSELECT count(*) FROM head WHERE age > 56"}]} +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE \"Documents_with_Expenses\" (\n\t\"Document_ID\" INTEGER NOT NULL, \n\t\"Budget_Type_Code\" CHAR(15) NOT NULL, \n\t\"Document_Details\" VARCHAR(255), \n\tPRIMARY KEY (\"Document_ID\"), \n\tFOREIGN KEY(\"Budget_Type_Code\") REFERENCES \"Ref_Budget_Codes\" (\"Budget_Type_Code\"), \n\tFOREIGN KEY(\"Document_ID\") REFERENCES \"Documents\" (\"Document_ID\")\n)\n/*\nColumns in Documents_with_Expenses and all categories for low cardinality columns :\nDocument_ID : 300, 240, 309, 226, 367, 371, 383, 227, 192, 57\nBudget_Type_Code : GV, SF, ORG\nDocument_Details : government, safety, organization\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_address_id INTEGER NOT NULL, \n\tcustomer_status_code VARCHAR(15) NOT NULL, \n\tdate_became_customer DATETIME, \n\tdate_of_birth DATETIME, \n\tfirst_name VARCHAR(80), \n\tlast_name VARCHAR(80), \n\tamount_outstanding DOUBLE, \n\temail_address VARCHAR(250), \n\tphone_number VARCHAR(255), \n\tcell_mobile_phone_number VARCHAR(255), \n\tPRIMARY KEY (customer_id), \n\tFOREIGN KEY(customer_address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ndate_became_customer : 2016-03-18 06:14:18, 2016-05-13 07:38:23, 2017-10-05 14:15:46\ndate_of_birth : 1994-05-07 01:32:16, 1991-12-05 02:50:15, 1998-09-18 04:45:01\nfirst_name : Millie, Dianna, Carole\nlast_name : Schmeler, Tromp, Goodwin\namount_outstanding : 7093.0, 8144.0, 9447.0\nemail_address : mariela28@example.org, wehner.harold@example.com, afranecki@example.com\nphone_number : 1-618-535-9750, 218.550.1362, 753.921.0871\ncell_mobile_phone_number : 578-518-4785x612, (780)719-4206x033, (630)964-2426\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\ncustomer_address_id : 1, 13, 2, 7, 8, 12, 10, 9\ncustomer_status_code : Bad Customer, Good Customer\n*/\n \nCREATE TABLE \"Instruments\" (\n\t\"SongId\" INTEGER, \n\t\"BandmateId\" INTEGER, \n\t\"Instrument\" TEXT, \n\tPRIMARY KEY (\"SongId\", \"BandmateId\", \"Instrument\"), \n\tFOREIGN KEY(\"SongId\") REFERENCES \"Songs\" (\"SongId\"), \n\tFOREIGN KEY(\"BandmateId\") REFERENCES \"Band\" (\"Id\")\n)\n/*\nColumns in Instruments and all categories for low cardinality columns :\nSongId : \nBandmateId : \nInstrument : \n*/\n \nCREATE TABLE \"Apartments\" (\n\tapt_id INTEGER NOT NULL, \n\tbuilding_id INTEGER NOT NULL, \n\tapt_type_code CHAR(15), \n\tapt_number CHAR(10), \n\tbathroom_count INTEGER, \n\tbedroom_count INTEGER, \n\troom_count CHAR(5), \n\tPRIMARY KEY (apt_id), \n\tFOREIGN KEY(building_id) REFERENCES \"Apartment_Buildings\" (building_id), \n\tUNIQUE (apt_id)\n)\n/*\nColumns in Apartments and 3 examples in each column for high cardinality columns :\napt_id : 1, 13, 11\napt_number : Suite 307, Apt. 411, Apt. 585\n*/\n/*\nColumns in Apartments and all categories for low cardinality columns :\nbuilding_id : 153, 673, 808, 744, 624, 790, 191, 734, 225\napt_type_code : Studio, Flat, Duplex\nbathroom_count : 1, 2, 3\nbedroom_count : 5, 4, 6, 3\nroom_count : 5, 7, 8, 6, 3, 9\n*/\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_first_name VARCHAR(20), \n\tcustomer_last_name VARCHAR(20), \n\tcustomer_address VARCHAR(255), \n\tcustomer_phone VARCHAR(255), \n\tcustomer_email VARCHAR(255), \n\tother_customer_details VARCHAR(255), \n\tPRIMARY KEY (customer_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ncustomer_first_name : Armani, Bryce, Nat\ncustomer_last_name : Swift, Lind, Huels\ncustomer_address : 6478 Moen Isle Suite 910\nSchimmelmouth, VT 96364-4898, 47286 Mraz Park Apt. 424\nJocelynfurt, OH 59023-2787, 094 Julianne Mill\nWebstertown, KY 91980-4004\ncustomer_phone : (415)237-0701x3115, 679-845-8645x94312, 284.749.0453\ncustomer_email : kyra.murazik@example.org, schaden.katrina@example.net, huels.antonina@example.com\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\nother_customer_details : None\n*/\n \nCREATE TABLE \"Guests\" (\n\tguest_id INTEGER NOT NULL, \n\tgender_code CHAR(1), \n\tguest_first_name VARCHAR(80), \n\tguest_last_name VARCHAR(80), \n\tdate_of_birth DATETIME, \n\tPRIMARY KEY (guest_id), \n\tUNIQUE (guest_id)\n)\n/*\nColumns in Guests and 3 examples in each column for high cardinality columns :\nguest_id : 1, 13, 11\nguest_first_name : Louvenia, Lou, Keith\nguest_last_name : Bode, Mante, Schowalter\ndate_of_birth : 2001-06-18 20:05:55, 1980-06-20 18:15:39, 1997-01-15 17:37:40\n*/\n/*\nColumns in Guests and all categories for low cardinality columns :\ngender_code : Male, Unknown, Female\n*/\n \nCREATE TABLE school_details (\n\t\"School_ID\" INTEGER, \n\t\"Nickname\" TEXT, \n\t\"Colors\" TEXT, \n\t\"League\" TEXT, \n\t\"Class\" TEXT, \n\t\"Division\" TEXT, \n\tPRIMARY KEY (\"School_ID\"), \n\tFOREIGN KEY(\"School_ID\") REFERENCES school (\"School_ID\")\n)\n/*\nColumns in school_details and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nNickname : Auks, Raiders, Silver Eagles, Seahawks, Buccaneers, Tigers\nColors : Dark Green and White, Blue and Yellow, Maroon and White, Blue and Gold, Garnet and White, Silver and Maroon\nLeague : DSHA\nClass : Flight B, Flight A, Fight A\nDivision : Division 1, Division 2, Division 3\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Sex\" TEXT, \n\t\"Name\" TEXT, \n\t\"Date_of_Birth\" TEXT, \n\t\"Height\" REAL, \n\t\"Weight\" REAL, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 8, 4, 6, 3, 9\nSex : F, M\nName : Vincent Montméat, Frantz Granvorka, Loïc De Kergret, Philippe Barça-Cysique, Guillaume Samica, Stéphane Antiga, Dominique Daquin, Laurent Capet, Hubert Henno\nDate_of_Birth : 20.08.1970, 10.03.1976, 05.05.1972, 28.09.1981, 22.04.1977, 01.09.1977, 10.11.1972, 06.10.1976, 03.02.1976\nHeight : 196.0, 195.0, 197.0, 188.0, 193.0, 200.0, 202.0, 194.0\nWeight : 89.0, 82.0, 85.0, 88.0, 90.0, 92.0, 83.0, 94.0\n*/\n \nCREATE TABLE protein (\n\tcommon_name TEXT, \n\tprotein_name TEXT, \n\tdivergence_from_human_lineage REAL, \n\taccession_number TEXT, \n\tsequence_length REAL, \n\tsequence_identity_to_human_protein TEXT, \n\t\"Institution_id\" TEXT, \n\tPRIMARY KEY (common_name), \n\tFOREIGN KEY(\"Institution_id\") REFERENCES \"Institution\" (\"Institution_id\")\n)\n/*\nColumns in protein and all categories for low cardinality columns :\ncommon_name : Tropical Clawed Frog, porkworm, body louse, southern house mosquito, purple sea urchin\nprotein_name : uncharacterized protein LOC578090, conserved hypothetical protein, Centromeric protein E, putative, surface antigen repeat family protein, uncharacterized protein C20orf117-like\ndivergence_from_human_lineage : 937.5, 742.9, 782.7, 371.2\naccession_number : XP_003380263.1, XP_002942331.1, XP_783370.2, XP_002429877.1, XP_001843754.1\nsequence_length : 2030.0, 1587.0, 2086.0, 1584.0, 1878.0\nsequence_identity_to_human_protein : 30%, 47%, 36%, 39%, 32%\nInstitution_id : 1, 5, 9, 3\n*/\n \nCREATE TABLE \"Products\" (\n\tproduct_id INTEGER, \n\tcolor_code VARCHAR(15) NOT NULL, \n\tproduct_category_code VARCHAR(15) NOT NULL, \n\tproduct_name VARCHAR(80), \n\ttypical_buying_price VARCHAR(20), \n\ttypical_selling_price VARCHAR(20), \n\tproduct_description VARCHAR(255), \n\tother_product_details VARCHAR(255), \n\tPRIMARY KEY (product_id), \n\tFOREIGN KEY(color_code) REFERENCES \"Ref_Colors\" (color_code), \n\tFOREIGN KEY(product_category_code) REFERENCES \"Ref_Product_Categories\" (product_category_code)\n)\n/*\nColumns in Products and 3 examples in each column for high cardinality columns :\nproduct_id : 1, 13, 11\nproduct_name : peper, cumin, chili\ntypical_buying_price : , 5.578, 503.8431967\ntypical_selling_price : , 149, 10377614.847385\nproduct_description : ut, et, vel\n*/\n/*\nColumns in Products and all categories for low cardinality columns :\ncolor_code : 1, 2, 5, 7, 4, 9\nproduct_category_code : Spices, Seeds, Herbs\nother_product_details : None\n*/\n \nCREATE TABLE film_text (\n\tfilm_id SMALLINT NOT NULL, \n\ttitle VARCHAR(255) NOT NULL, \n\tdescription TEXT, \n\tPRIMARY KEY (film_id)\n)\n/*\nColumns in film_text and all categories for low cardinality columns :\nfilm_id : \ntitle : \ndescription : \n*/\n \nCREATE TABLE \"Student\" (\n\t\"StuID\" INTEGER, \n\t\"LName\" VARCHAR(12), \n\t\"Fname\" VARCHAR(12), \n\t\"Age\" INTEGER, \n\t\"Sex\" VARCHAR(1), \n\t\"Major\" INTEGER, \n\t\"Advisor\" INTEGER, \n\tcity_code VARCHAR(3), \n\tPRIMARY KEY (\"StuID\")\n)\n/*\nColumns in Student and 3 examples in each column for high cardinality columns :\nStuID : 1003, 1017, 1002\nLName : Lee, Han, Brody\nFname : Tracy, Michael, Shiela\nAdvisor : 7723, 7712, 7134\ncity_code : ATL, HOU, LON\n*/\n/*\nColumns in Student and all categories for low cardinality columns :\nAge : 26, 16, 20, 17, 21, 19, 18, 27, 22\nSex : F, M\nMajor : 520, 100, 50, 600, 540, 550\n*/\n \nCREATE TABLE pilot (\n\t\"Pilot_ID\" INTEGER, \n\t\"Pilot_name\" TEXT, \n\t\"Rank\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Nationality\" TEXT, \n\t\"Position\" TEXT, \n\t\"Join_Year\" INTEGER, \n\t\"Team\" TEXT, \n\tPRIMARY KEY (\"Pilot_ID\")\n)\n/*\nColumns in pilot and all categories for low cardinality columns :\nPilot_ID : 1, 2, 5, 4, 3\nPilot_name : Patrick O'Bryant, Charles Oakley, Dan O'Sullivan, Hakeem Olajuwon, Jermaine O'Neal\nRank : 34, 13, 45, 6\nAge : 37, 33, 40, 32, 22\nNationality : United States, Nigeria, United Kindom\nPosition : Center Team, Forward Team, Forward-Center Team\nJoin_Year : 2009, 2001, 2008, 1999, 2010\nTeam : Fordham, Eau Claire High School, Bradley, Virginia Union, Houston\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n"}, {"role": "user", "content": "List the name, born state and age of the heads of departments ordered by age."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: name, born_state, age.\nThe SQL query I'll be generating is:\nSELECT name , born_state , age FROM head ORDER BY age"}]} +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Graduation_College\" TEXT, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 4, 6, 3\nAge : 37, 25, 30, 34, 27, 31\nName : Tom Sheehey, Reggie Lewis, Brad Lohaus, David Butler, Tim Naegeli, Darryl Kennedy, Jerry Corcoran\nNationality : United States, United Kindom\nGraduation_College : Wisconsin–Stevens Point, California, Virginia, Iowa, Northeastern, Oklahoma\n*/\n \nCREATE TABLE \"People_Addresses\" (\n\tperson_address_id INTEGER NOT NULL, \n\tperson_id INTEGER NOT NULL, \n\taddress_id INTEGER NOT NULL, \n\tdate_from DATETIME, \n\tdate_to DATETIME, \n\tPRIMARY KEY (person_address_id), \n\tFOREIGN KEY(person_id) REFERENCES \"People\" (person_id), \n\tFOREIGN KEY(address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in People_Addresses and all categories for low cardinality columns :\nperson_address_id : 269, 276, 281, 257, 363, 396, 122, 340\nperson_id : 151, 141, 131, 171, 111, 161, 181, 121\naddress_id : 45, 9, 5, 88, 55, 92, 82, 99\ndate_from : 2013-12-26 16:57:01, 2008-07-31 02:17:25, 2012-09-26 13:21:00, 2008-05-26 20:43:41, 2014-05-10 00:32:31, 2010-11-26 05:21:12, 2017-05-01 17:32:26, 2015-05-24 16:14:12\ndate_to : 2018-03-21 09:46:30, 2018-03-08 06:16:47, 2018-02-23 22:44:18, 2018-03-11 20:26:41, 2018-03-09 02:11:12, 2018-03-09 08:45:06, 2018-03-03 16:06:17, 2018-03-12 21:10:02\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Country\" TEXT, \n\t\"Is_Male\" TEXT, \n\t\"Age\" INTEGER, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and 3 examples in each column for high cardinality columns :\nPeople_ID : 1, 11, 2\nName : Summer Duval, Annie Mediate, Juli Hanson\n*/\n/*\nColumns in people and all categories for low cardinality columns :\nCountry : Sweden, Canada, United States, Australia, Northern Ireland\nIs_Male : T, F\nAge : 26, 25, 36, 30, 34, 32, 27, 31\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE budget (\n\t\"School_id\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Budgeted\" INTEGER, \n\ttotal_budget_percent_budgeted REAL, \n\t\"Invested\" INTEGER, \n\ttotal_budget_percent_invested REAL, \n\t\"Budget_invested_percent\" TEXT, \n\tPRIMARY KEY (\"School_id\", \"Year\"), \n\tFOREIGN KEY(\"School_id\") REFERENCES \"School\" (\"School_id\")\n)\n/*\nColumns in budget and all categories for low cardinality columns :\nSchool_id : 1, 2, 5, 4, 3\nYear : 2004, 2001, 2005, 1999, 2000, 2006, 2002, 2003\nBudgeted : 113531, 3666, 61283, 119527, 6803, 4979, 74849\ntotal_budget_percent_budgeted : 1.3, 2.0, 2.4\nInvested : 8335, 5840, 95542, 146102, 2134, 85249, 140102\ntotal_budget_percent_invested : 2.7, 2.2, 2.0\nBudget_invested_percent : 71.3, 228.6, 146.9, 127.6, 228.8, 122.5, 42.9, 159.3\n*/\n \nCREATE TABLE \"Roles\" (\n\t\"Role_Code\" CHAR(15) NOT NULL, \n\t\"Role_Name\" VARCHAR(255), \n\t\"Role_Description\" VARCHAR(255), \n\tPRIMARY KEY (\"Role_Code\")\n)\n/*\nColumns in Roles and all categories for low cardinality columns :\nRole_Code : PR, HR, PT, MG, ED\nRole_Name : Proof Reader, Human Resource, Editor, Photo, Manager\nRole_Description : Itaque dolor ut nemo rerum vitae provident. Vel laborum ipsum velit sint. Et est omnis dignissimos., Ut sed quae eaque mollitia qui hic. Natus ea expedita et odio illum fugiat qui natus. Consequatur velit ut dolorem cum ullam esse deserunt dignissimos. Enim non non rem officiis quis., Aut modi nihil molestias temporibus sit rerum. Sit neque eaque odio omnis incidunt., Vero harum corrupti odit ipsa vero et odio. Iste et recusandae temporibus maxime. Magni aspernatur fugit quis explicabo totam esse corrupti., Et totam est quibusdam aspernatur ut. Vitae perferendis eligendi voluptatem molestiae rem ut enim. Ipsum expedita quae earum unde est. Repellendus ut ipsam nihil accusantium sit. Magni accusantium numquam quod et.\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\tinvoice_date DATETIME, \n\tinvoice_details VARCHAR(255), \n\tPRIMARY KEY (invoice_number)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 1987-11-15 15:41:40, 1998-07-30 15:39:12, 1974-07-27 16:18:49\ninvoice_details : doloremque, eveniet, earum\n*/\n\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE club_rank (\n\t\"Rank\" REAL, \n\t\"Club_ID\" INTEGER, \n\t\"Gold\" REAL, \n\t\"Silver\" REAL, \n\t\"Bronze\" REAL, \n\t\"Total\" REAL, \n\tPRIMARY KEY (\"Rank\", \"Club_ID\"), \n\tFOREIGN KEY(\"Club_ID\") REFERENCES club (\"Club_ID\")\n)\n/*\nColumns in club_rank and all categories for low cardinality columns :\nRank : 3.0, 1.0, 2.0, 6.0, 4.0, 5.0\nClub_ID : 1, 2, 5, 4, 6, 3\nGold : 8.0, 3.0, 11.0, 7.0, 2.0, 4.0\nSilver : 3.0, 11.0, 1.0, 7.0, 2.0, 4.0\nBronze : 6.0, 9.0, 0.0, 2.0\nTotal : 3.0, 12.0, 13.0, 6.0, 21.0, 31.0\n*/\n \nCREATE TABLE \"Staff\" (\n\tstaff_id INTEGER, \n\tstaff_gender VARCHAR(1), \n\tstaff_name VARCHAR(80), \n\tPRIMARY KEY (staff_id)\n)\n/*\nColumns in Staff and 3 examples in each column for high cardinality columns :\nstaff_id : 1, 13, 11\nstaff_name : Yolanda, Tom, Melody\n*/\n/*\nColumns in Staff and all categories for low cardinality columns :\nstaff_gender : 1, 0\n*/\n \nCREATE TABLE \"Problem_Log\" (\n\tproblem_log_id INTEGER, \n\tassigned_to_staff_id INTEGER NOT NULL, \n\tproblem_id INTEGER NOT NULL, \n\tproblem_category_code VARCHAR(20) NOT NULL, \n\tproblem_status_code VARCHAR(20) NOT NULL, \n\tlog_entry_date DATETIME, \n\tlog_entry_description VARCHAR(255), \n\tlog_entry_fix VARCHAR(255), \n\tother_log_details VARCHAR(255), \n\tPRIMARY KEY (problem_log_id), \n\tFOREIGN KEY(problem_status_code) REFERENCES \"Problem_Status_Codes\" (problem_status_code), \n\tFOREIGN KEY(problem_id) REFERENCES \"Problems\" (problem_id), \n\tFOREIGN KEY(assigned_to_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(problem_category_code) REFERENCES \"Problem_Category_Codes\" (problem_category_code)\n)\n/*\nColumns in Problem_Log and 3 examples in each column for high cardinality columns :\nproblem_log_id : 1, 13, 11\nlog_entry_date : 1997-08-31 08:19:12, 1974-09-13 00:37:26, 2011-11-12 23:30:53\nlog_entry_description : t, d, b\nlog_entry_fix : t, i, s\nother_log_details : d, f, b\n*/\n/*\nColumns in Problem_Log and all categories for low cardinality columns :\nassigned_to_staff_id : 1, 13, 11, 2, 5, 8, 4, 6, 12, 10\nproblem_id : 1, 13, 11, 10, 7, 8, 4, 12, 15\nproblem_category_code : Datatabase, Middleware, GUI\nproblem_status_code : Solved, Reported\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\torder_id INTEGER NOT NULL, \n\tinvoice_date DATETIME, \n\tPRIMARY KEY (invoice_number), \n\tFOREIGN KEY(order_id) REFERENCES \"Orders\" (order_id)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 2018-03-16 21:57:43, 2018-03-20 00:21:41, 2018-03-05 08:47:33\n*/\n/*\nColumns in Invoices and all categories for low cardinality columns :\norder_id : 13, 11, 5, 8, 4, 12, 3, 14, 10, 9\n*/\n \nCREATE TABLE \"Customer_Events\" (\n\t\"Customer_Event_ID\" INTEGER NOT NULL, \n\tcustomer_id INTEGER, \n\tdate_moved_in DATETIME, \n\tproperty_id INTEGER, \n\tresident_id INTEGER, \n\tthing_id INTEGER NOT NULL, \n\tPRIMARY KEY (\"Customer_Event_ID\"), \n\tFOREIGN KEY(thing_id) REFERENCES \"Things\" (thing_id), \n\tFOREIGN KEY(customer_id) REFERENCES \"Customers\" (customer_id), \n\tFOREIGN KEY(resident_id, property_id, date_moved_in) REFERENCES \"Residents\" (resident_id, property_id, date_moved_in)\n)\n/*\nColumns in Customer_Events and 3 examples in each column for high cardinality columns :\nCustomer_Event_ID : 428, 216, 226\nresident_id : 87, 38, 37\n*/\n/*\nColumns in Customer_Events and all categories for low cardinality columns :\ncustomer_id : 79, 91, 16, 5, 4, 63, 19, 31\ndate_moved_in : 2016-09-27 12:00:00, 2015-03-27 12:00:00\nproperty_id : 954, 120, 108, 605, 629, 748, 669, 445, 879, 107\nthing_id : 1, 46, 10, 80, 2, 92, 14, 85, 68\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE discipline_enrollments (\n\t\"Campus\" INTEGER, \n\t\"Discipline\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Undergraduate\" INTEGER, \n\t\"Graduate\" INTEGER, \n\tPRIMARY KEY (\"Campus\", \"Discipline\"), \n\tFOREIGN KEY(\"Campus\") REFERENCES \"Campuses\" (\"Id\")\n)\n/*\nColumns in discipline_enrollments and 3 examples in each column for high cardinality columns :\nCampus : 1, 13, 11\nDiscipline : 19, 17, 21\nUndergraduate : 986, 65, 815\nGraduate : 56, 377, 187\n*/\n/*\nColumns in discipline_enrollments and all categories for low cardinality columns :\nYear : 2004\n*/\n \nCREATE TABLE country (\n\t\"Country_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Population\" INTEGER, \n\t\"Area\" INTEGER, \n\t\"Languages\" TEXT, \n\tPRIMARY KEY (\"Country_ID\")\n)\n/*\nColumns in country and all categories for low cardinality columns :\nCountry_ID : 1, 2, 3\nName : Austria, Sweden, Finland\nPopulation : 8206524, 9047752, 5261008\nArea : 83871, 338145, 449964\nLanguages : Swedish, Finnish Swedish, German\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n"}, {"role": "user", "content": "List the creation year, name and budget of each department."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: department.\nFrom these tables, I'll be using the following columns: creation, name, budget_in_billions.\nThe SQL query I'll be generating is:\nSELECT creation , name , budget_in_billions FROM department"}]} + +// Add more training inputs following the format above \ No newline at end of file diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py index 620f0001..b421541b 100644 --- a/hyperbolic_agentkit_core/actions/run_remote_finetune.py +++ b/hyperbolic_agentkit_core/actions/run_remote_finetune.py @@ -68,18 +68,20 @@ def _run( if isinstance(sync_result, dict) and not sync_result.get("success", False): return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) - # Step 4: Execute setup and training commands in a single shell session + # Step 4: Execute setup, training, and test inference in a single shell session combined_command = ( "cd finetune_example && " "bash -c '" "python3 -m venv venv && " "source venv/bin/activate && " "pip install -r requirements.txt && " - f"FINE_TUNE_MODEL={model_name} python3 finetune.py" + f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " + "python3 test_inference.py 'Which country has the highest population?'" "'" ) shell_result = ssh_manager.execute(combined_command) + # Check for the finetuned_model directory verify_result = ssh_manager.execute("test -d /home/ubuntu/finetune_example/finetuned_model && echo 'exists'") if "exists" not in str(verify_result): @@ -87,11 +89,19 @@ def _run( "status": "error", "message": f"Fine-tuning failed or directory not created. Output: {shell_result}" }) + + # Get the inference output + inference_output = ssh_manager.execute("cat /home/ubuntu/finetune_example/inference_output.json") + try: + inference_result = json.loads(inference_output) + except: + inference_result = {"error": "Failed to parse inference output"} return json.dumps({ "status": "success", "message": "Fine-tuning completed successfully", - "model_name": model_name + "model_name": model_name, + "test_inference_output": inference_result }) except Exception as e: @@ -249,6 +259,7 @@ def sync_to_remote() -> Dict[str, bool]: "./finetune_example/training_data.jsonl", "./finetune_example/requirements.txt", "./finetune_example/finetune.py", + "./finetune_example/test_inference.py" ] From bfeab922d640d54f76630001b1f578830b2dd703 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Thu, 13 Feb 2025 16:34:29 -0500 Subject: [PATCH 05/10] fix inference job dependencies --- finetune_example/requirements.txt | 16 +++++++++------- finetune_example/training_data.jsonl | 2 -- .../actions/run_remote_finetune.py | 17 ++++++++++++----- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/finetune_example/requirements.txt b/finetune_example/requirements.txt index aeb40a4e..c7a984a4 100644 --- a/finetune_example/requirements.txt +++ b/finetune_example/requirements.txt @@ -1,12 +1,14 @@ -unsloth -xformers -torch --index-url https://download.pytorch.org/whl/cu118 -torchvision --index-url https://download.pytorch.org/whl/cu118 -transformers +unsloth==2025.2.4 +unsloth-zoo==2025.2.3 +transformers==4.48.3 +trl==0.14.0 +xformers==0.0.28.post3 +torch==2.5.1 --index-url https://download.pytorch.org/whl/cu118 +torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu118 accelerate bitsandbytes -trl datasets peft ninja -numpy<2.0 \ No newline at end of file +numpy<2.0 +vllm==0.7.2 \ No newline at end of file diff --git a/finetune_example/training_data.jsonl b/finetune_example/training_data.jsonl index d8027a8b..178de88d 100644 --- a/finetune_example/training_data.jsonl +++ b/finetune_example/training_data.jsonl @@ -1,5 +1,3 @@ {"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE trip (\n\tid INTEGER, \n\tduration INTEGER, \n\tstart_date TEXT, \n\tstart_station_name TEXT, \n\tstart_station_id INTEGER, \n\tend_date TEXT, \n\tend_station_name TEXT, \n\tend_station_id INTEGER, \n\tbike_id INTEGER, \n\tsubscription_type TEXT, \n\tzip_code INTEGER, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in trip and 3 examples in each column for high cardinality columns :\nid : 900645, 900752, 900524\nduration : 1131, 2146, 1155\nstart_date : 8/21/2015 17:39, 8/21/2015 17:03, 8/21/2015 17:16\nstart_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nstart_station_id : 56, 65, 49\nend_date : 8/21/2015 17:19, 8/21/2015 18:08, 8/21/2015 17:32\nend_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nend_station_id : 56, 65, 49\nbike_id : 586, 56, 65\nzip_code : 94070, 94530, 94040-1724\n*/\n/*\nColumns in trip and all categories for low cardinality columns :\nsubscription_type : Customer, Subscriber\n*/\n \nCREATE TABLE \"Problems\" (\n\tproblem_id INTEGER, \n\tproduct_id INTEGER NOT NULL, \n\tclosure_authorised_by_staff_id INTEGER NOT NULL, \n\treported_by_staff_id INTEGER NOT NULL, \n\tdate_problem_reported DATETIME NOT NULL, \n\tdate_problem_closed DATETIME, \n\tproblem_description VARCHAR(255), \n\tother_problem_details VARCHAR(255), \n\tPRIMARY KEY (problem_id), \n\tFOREIGN KEY(reported_by_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(product_id) REFERENCES \"Product\" (product_id), \n\tFOREIGN KEY(closure_authorised_by_staff_id) REFERENCES \"Staff\" (staff_id)\n)\n/*\nColumns in Problems and 3 examples in each column for high cardinality columns :\nproblem_id : 1, 13, 11\nclosure_authorised_by_staff_id : 1, 13, 2\ndate_problem_reported : 1995-05-14 08:32:56, 1988-11-07 16:09:31, 1986-11-13 07:30:55\ndate_problem_closed : 1974-09-20 13:42:19, 1997-10-18 20:09:57, 2004-06-20 01:08:25\nproblem_description : d, i, s\n*/\n/*\nColumns in Problems and all categories for low cardinality columns :\nproduct_id : 1, 13, 2, 5, 7, 8, 4, 6, 15\nreported_by_staff_id : 1, 13, 11, 2, 5, 7, 4, 14, 10\nother_problem_details : f, m, i, s, k, l, p, v, c\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE category (\n\tcategory_id INTEGER NOT NULL, \n\tname VARCHAR(25) NOT NULL, \n\tlast_update TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, \n\tPRIMARY KEY (category_id)\n)\n/*\nColumns in category and 3 examples in each column for high cardinality columns :\ncategory_id : 1, 16, 13\nname : Family, Sci-Fi, Action\n*/\n/*\nColumns in category and all categories for low cardinality columns :\nlast_update : 2006-02-15 04:46:27\n*/\n \nCREATE TABLE ship (\n\t\"Ship_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Type\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Tonnage\" INTEGER, \n\tPRIMARY KEY (\"Ship_ID\")\n)\n/*\nColumns in ship and all categories for low cardinality columns :\nShip_ID : 1, 2, 5, 7, 8, 4, 6, 3\nName : Clan McTavish, Farringford, Appam, Author, Dromonby, Corbridge, Trader, Ariadne\nType : Battle ship, Cargo ship\nNationality : United States, United Kingdom\nTonnage : 3035, 3146, 7781, 3496, 3687, 5816, 3627, 3608\n*/\n \nCREATE TABLE member_attendance (\n\t\"Member_ID\" INTEGER, \n\t\"Performance_ID\" INTEGER, \n\t\"Num_of_Pieces\" INTEGER, \n\tPRIMARY KEY (\"Member_ID\", \"Performance_ID\"), \n\tFOREIGN KEY(\"Performance_ID\") REFERENCES performance (\"Performance_ID\"), \n\tFOREIGN KEY(\"Member_ID\") REFERENCES member (\"Member_ID\")\n)\n/*\nColumns in member_attendance and all categories for low cardinality columns :\nMember_ID : 1, 11, 2, 5, 7, 4, 3\nPerformance_ID : 1, 2, 4, 6, 3\nNum_of_Pieces : 1, 2, 4, 3\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE chip_model (\n\t\"Model_name\" TEXT, \n\t\"Launch_year\" REAL, \n\t\"RAM_MiB\" REAL, \n\t\"ROM_MiB\" REAL, \n\t\"Slots\" TEXT, \n\t\"WiFi\" TEXT, \n\t\"Bluetooth\" TEXT, \n\tPRIMARY KEY (\"Model_name\")\n)\n/*\nColumns in chip_model and 3 examples in each column for high cardinality columns :\nModel_name : X30 mid-range, X50 Advanced, X51 mid-range\n*/\n/*\nColumns in chip_model and all categories for low cardinality columns :\nLaunch_year : 2002.0, 2005.0, 2004.0, 2003.0\nRAM_MiB : 32.0, 64.0\nROM_MiB : 48.0, 256.0, 128.0, 32.0, 64.0\nSlots : 1CFII,1SD, 1SD\nWiFi : 802.11b, No\nBluetooth : 1.2, Yes, No, 1.1\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE mountain (\n\t\"Mountain_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Height\" REAL, \n\t\"Prominence\" REAL, \n\t\"Range\" TEXT, \n\t\"Country\" TEXT, \n\tPRIMARY KEY (\"Mountain_ID\")\n)\n/*\nColumns in mountain and all categories for low cardinality columns :\nMountain_ID : 1, 2, 5, 7, 4, 6, 3\nName : Ngaliema / Mt Stanley (Margherita Pk), Mount Kenya (Lenana), Kibo (Uhuru Pk), Ngaliema / Mt Stanley (Savoia Pk), Mount Kenya (Batian), Duwoni / Mt Speke (Vittorio Emanuele Pk), Mawenzi (Hans Meyer Pk)\nHeight : 5109.0, 5199.0, 5895.0, 4890.0, 4985.0, 4977.0, 5148.0\nProminence : 720.0, 850.0, 3951.0, 3825.0, 130.0, 5885.0, 110.0\nRange : Kilimanjaro, Mount Kenya, Rwenzori\nCountry : DR Congo Uganda, Uganda, Tanzania, Kenya\n*/\n \nCREATE TABLE \"Restaurant_Type\" (\n\t\"ResTypeID\" INTEGER, \n\t\"ResTypeName\" VARCHAR(40), \n\t\"ResTypeDescription\" VARCHAR(100), \n\tPRIMARY KEY (\"ResTypeID\")\n)\n/*\nColumns in Restaurant_Type and all categories for low cardinality columns :\nResTypeID : 1, 2\nResTypeName : Sandwich, Stir-fry\nResTypeDescription : Classic Chinese cooking., Simplest there is.\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE \"Country\" (\n\tid INTEGER, \n\tname TEXT, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in Country and 3 examples in each column for high cardinality columns :\nid : 1, 19694, 7809\nname : Scotland, Italy, Spain\n*/\n\n \nCREATE TABLE artist (\n\tartist_name TEXT(50) NOT NULL, \n\tcountry TEXT(20), \n\tgender TEXT(20), \n\tpreferred_genre TEXT(50), \n\tCONSTRAINT a_name PRIMARY KEY (artist_name), \n\tFOREIGN KEY(preferred_genre) REFERENCES genre (g_name) ON DELETE CASCADE\n)\n/*\nColumns in artist and all categories for low cardinality columns :\nartist_name : Prity, Michel, Topu, Shrikanta, Enrique, Farida\ncountry : India, UK, USA, Bangladesh\ngender : Male, Female\npreferred_genre : tagore, folk, modern, nazrul, blues, pop\n*/\n \nCREATE TABLE \"Organizations\" (\n\torganization_id INTEGER NOT NULL, \n\tparent_organization_id INTEGER, \n\torganization_details VARCHAR(255), \n\tPRIMARY KEY (organization_id)\n)\n/*\nColumns in Organizations and all categories for low cardinality columns :\norganization_id : 7, 8, 10\nparent_organization_id : 7, 8\norganization_details : Denesik and Sons Party, Reinger, Hudson and Nolan Group, Robel-Schulist Group\n*/\n \nCREATE TABLE school (\n\t\"School_ID\" INTEGER, \n\t\"School\" TEXT, \n\t\"Location\" TEXT, \n\t\"Enrollment\" REAL, \n\t\"Founded\" REAL, \n\t\"Denomination\" TEXT, \n\t\"Boys_or_Girls\" TEXT, \n\t\"Day_or_Boarding\" TEXT, \n\t\"Year_Entered_Competition\" REAL, \n\t\"School_Colors\" TEXT, \n\tPRIMARY KEY (\"School_ID\")\n)\n/*\nColumns in school and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nSchool : St Aloysius' College, Cranbrook School, Waverley College, Knox Grammar School, Barker College, Trinity Grammar School\nLocation : Hornsby, Summer Hill, Waverley, Bellevue Hill, Milsons Point, Wahroonga\nEnrollment : 1000.0, 1850.0, 2200.0, 1200.0, 2300.0, 1430.0\nFounded : 1918.0, 1924.0, 1913.0, 1879.0, 1903.0, 1890.0\nDenomination : Catholic, Uniting Church, Anglican\nBoys_or_Girls : Boys only to Yr 9 Co-ed Year 10 to 12, Boys\nDay_or_Boarding : Day, Day & Boarding\nYear_Entered_Competition : 1944.0, 1929.0\nSchool_Colors : Royal Blue and Gold, Black & Blue, Red, White & Blue, Red & Blue, Green and White\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE journalist (\n\t\"journalist_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Age\" TEXT, \n\t\"Years_working\" INTEGER, \n\tPRIMARY KEY (\"journalist_ID\")\n)\n/*\nColumns in journalist and 3 examples in each column for high cardinality columns :\njournalist_ID : 1, 11, 2\nName : Jack Meaney, Herbert Swindells, Jackie Waring\n*/\n/*\nColumns in journalist and all categories for low cardinality columns :\nNationality : Northern Ireland, Wales, England\nAge : 37, 28, 25, 33, 34, 43, 27, 29\nYears_working : 1, 5, 7, 8, 21, 6, 3, 12, 10, 9\n*/\n \nCREATE TABLE list (\n\t\"LastName\" TEXT, \n\t\"FirstName\" TEXT, \n\t\"Grade\" INTEGER, \n\t\"Classroom\" INTEGER, \n\tPRIMARY KEY (\"LastName\", \"FirstName\")\n)\n/*\nColumns in list and 3 examples in each column for high cardinality columns :\nLastName : HOUTCHENS, GELL, FLACHS\nFirstName : RAY, EMILE, PATRINA\nClassroom : 109, 110, 106\n*/\n/*\nColumns in list and all categories for low cardinality columns :\nGrade : 1, 2, 5, 4, 6, 3, 0\n*/\n"}, {"role": "user", "content": "How many heads of the departments are older than 56 ?"}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: age.\nThe SQL query I'll be generating is:\nSELECT count(*) FROM head WHERE age > 56"}]} {"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE \"Documents_with_Expenses\" (\n\t\"Document_ID\" INTEGER NOT NULL, \n\t\"Budget_Type_Code\" CHAR(15) NOT NULL, \n\t\"Document_Details\" VARCHAR(255), \n\tPRIMARY KEY (\"Document_ID\"), \n\tFOREIGN KEY(\"Budget_Type_Code\") REFERENCES \"Ref_Budget_Codes\" (\"Budget_Type_Code\"), \n\tFOREIGN KEY(\"Document_ID\") REFERENCES \"Documents\" (\"Document_ID\")\n)\n/*\nColumns in Documents_with_Expenses and all categories for low cardinality columns :\nDocument_ID : 300, 240, 309, 226, 367, 371, 383, 227, 192, 57\nBudget_Type_Code : GV, SF, ORG\nDocument_Details : government, safety, organization\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_address_id INTEGER NOT NULL, \n\tcustomer_status_code VARCHAR(15) NOT NULL, \n\tdate_became_customer DATETIME, \n\tdate_of_birth DATETIME, \n\tfirst_name VARCHAR(80), \n\tlast_name VARCHAR(80), \n\tamount_outstanding DOUBLE, \n\temail_address VARCHAR(250), \n\tphone_number VARCHAR(255), \n\tcell_mobile_phone_number VARCHAR(255), \n\tPRIMARY KEY (customer_id), \n\tFOREIGN KEY(customer_address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ndate_became_customer : 2016-03-18 06:14:18, 2016-05-13 07:38:23, 2017-10-05 14:15:46\ndate_of_birth : 1994-05-07 01:32:16, 1991-12-05 02:50:15, 1998-09-18 04:45:01\nfirst_name : Millie, Dianna, Carole\nlast_name : Schmeler, Tromp, Goodwin\namount_outstanding : 7093.0, 8144.0, 9447.0\nemail_address : mariela28@example.org, wehner.harold@example.com, afranecki@example.com\nphone_number : 1-618-535-9750, 218.550.1362, 753.921.0871\ncell_mobile_phone_number : 578-518-4785x612, (780)719-4206x033, (630)964-2426\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\ncustomer_address_id : 1, 13, 2, 7, 8, 12, 10, 9\ncustomer_status_code : Bad Customer, Good Customer\n*/\n \nCREATE TABLE \"Instruments\" (\n\t\"SongId\" INTEGER, \n\t\"BandmateId\" INTEGER, \n\t\"Instrument\" TEXT, \n\tPRIMARY KEY (\"SongId\", \"BandmateId\", \"Instrument\"), \n\tFOREIGN KEY(\"SongId\") REFERENCES \"Songs\" (\"SongId\"), \n\tFOREIGN KEY(\"BandmateId\") REFERENCES \"Band\" (\"Id\")\n)\n/*\nColumns in Instruments and all categories for low cardinality columns :\nSongId : \nBandmateId : \nInstrument : \n*/\n \nCREATE TABLE \"Apartments\" (\n\tapt_id INTEGER NOT NULL, \n\tbuilding_id INTEGER NOT NULL, \n\tapt_type_code CHAR(15), \n\tapt_number CHAR(10), \n\tbathroom_count INTEGER, \n\tbedroom_count INTEGER, \n\troom_count CHAR(5), \n\tPRIMARY KEY (apt_id), \n\tFOREIGN KEY(building_id) REFERENCES \"Apartment_Buildings\" (building_id), \n\tUNIQUE (apt_id)\n)\n/*\nColumns in Apartments and 3 examples in each column for high cardinality columns :\napt_id : 1, 13, 11\napt_number : Suite 307, Apt. 411, Apt. 585\n*/\n/*\nColumns in Apartments and all categories for low cardinality columns :\nbuilding_id : 153, 673, 808, 744, 624, 790, 191, 734, 225\napt_type_code : Studio, Flat, Duplex\nbathroom_count : 1, 2, 3\nbedroom_count : 5, 4, 6, 3\nroom_count : 5, 7, 8, 6, 3, 9\n*/\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_first_name VARCHAR(20), \n\tcustomer_last_name VARCHAR(20), \n\tcustomer_address VARCHAR(255), \n\tcustomer_phone VARCHAR(255), \n\tcustomer_email VARCHAR(255), \n\tother_customer_details VARCHAR(255), \n\tPRIMARY KEY (customer_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ncustomer_first_name : Armani, Bryce, Nat\ncustomer_last_name : Swift, Lind, Huels\ncustomer_address : 6478 Moen Isle Suite 910\nSchimmelmouth, VT 96364-4898, 47286 Mraz Park Apt. 424\nJocelynfurt, OH 59023-2787, 094 Julianne Mill\nWebstertown, KY 91980-4004\ncustomer_phone : (415)237-0701x3115, 679-845-8645x94312, 284.749.0453\ncustomer_email : kyra.murazik@example.org, schaden.katrina@example.net, huels.antonina@example.com\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\nother_customer_details : None\n*/\n \nCREATE TABLE \"Guests\" (\n\tguest_id INTEGER NOT NULL, \n\tgender_code CHAR(1), \n\tguest_first_name VARCHAR(80), \n\tguest_last_name VARCHAR(80), \n\tdate_of_birth DATETIME, \n\tPRIMARY KEY (guest_id), \n\tUNIQUE (guest_id)\n)\n/*\nColumns in Guests and 3 examples in each column for high cardinality columns :\nguest_id : 1, 13, 11\nguest_first_name : Louvenia, Lou, Keith\nguest_last_name : Bode, Mante, Schowalter\ndate_of_birth : 2001-06-18 20:05:55, 1980-06-20 18:15:39, 1997-01-15 17:37:40\n*/\n/*\nColumns in Guests and all categories for low cardinality columns :\ngender_code : Male, Unknown, Female\n*/\n \nCREATE TABLE school_details (\n\t\"School_ID\" INTEGER, \n\t\"Nickname\" TEXT, \n\t\"Colors\" TEXT, \n\t\"League\" TEXT, \n\t\"Class\" TEXT, \n\t\"Division\" TEXT, \n\tPRIMARY KEY (\"School_ID\"), \n\tFOREIGN KEY(\"School_ID\") REFERENCES school (\"School_ID\")\n)\n/*\nColumns in school_details and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nNickname : Auks, Raiders, Silver Eagles, Seahawks, Buccaneers, Tigers\nColors : Dark Green and White, Blue and Yellow, Maroon and White, Blue and Gold, Garnet and White, Silver and Maroon\nLeague : DSHA\nClass : Flight B, Flight A, Fight A\nDivision : Division 1, Division 2, Division 3\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Sex\" TEXT, \n\t\"Name\" TEXT, \n\t\"Date_of_Birth\" TEXT, \n\t\"Height\" REAL, \n\t\"Weight\" REAL, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 8, 4, 6, 3, 9\nSex : F, M\nName : Vincent Montméat, Frantz Granvorka, Loïc De Kergret, Philippe Barça-Cysique, Guillaume Samica, Stéphane Antiga, Dominique Daquin, Laurent Capet, Hubert Henno\nDate_of_Birth : 20.08.1970, 10.03.1976, 05.05.1972, 28.09.1981, 22.04.1977, 01.09.1977, 10.11.1972, 06.10.1976, 03.02.1976\nHeight : 196.0, 195.0, 197.0, 188.0, 193.0, 200.0, 202.0, 194.0\nWeight : 89.0, 82.0, 85.0, 88.0, 90.0, 92.0, 83.0, 94.0\n*/\n \nCREATE TABLE protein (\n\tcommon_name TEXT, \n\tprotein_name TEXT, \n\tdivergence_from_human_lineage REAL, \n\taccession_number TEXT, \n\tsequence_length REAL, \n\tsequence_identity_to_human_protein TEXT, \n\t\"Institution_id\" TEXT, \n\tPRIMARY KEY (common_name), \n\tFOREIGN KEY(\"Institution_id\") REFERENCES \"Institution\" (\"Institution_id\")\n)\n/*\nColumns in protein and all categories for low cardinality columns :\ncommon_name : Tropical Clawed Frog, porkworm, body louse, southern house mosquito, purple sea urchin\nprotein_name : uncharacterized protein LOC578090, conserved hypothetical protein, Centromeric protein E, putative, surface antigen repeat family protein, uncharacterized protein C20orf117-like\ndivergence_from_human_lineage : 937.5, 742.9, 782.7, 371.2\naccession_number : XP_003380263.1, XP_002942331.1, XP_783370.2, XP_002429877.1, XP_001843754.1\nsequence_length : 2030.0, 1587.0, 2086.0, 1584.0, 1878.0\nsequence_identity_to_human_protein : 30%, 47%, 36%, 39%, 32%\nInstitution_id : 1, 5, 9, 3\n*/\n \nCREATE TABLE \"Products\" (\n\tproduct_id INTEGER, \n\tcolor_code VARCHAR(15) NOT NULL, \n\tproduct_category_code VARCHAR(15) NOT NULL, \n\tproduct_name VARCHAR(80), \n\ttypical_buying_price VARCHAR(20), \n\ttypical_selling_price VARCHAR(20), \n\tproduct_description VARCHAR(255), \n\tother_product_details VARCHAR(255), \n\tPRIMARY KEY (product_id), \n\tFOREIGN KEY(color_code) REFERENCES \"Ref_Colors\" (color_code), \n\tFOREIGN KEY(product_category_code) REFERENCES \"Ref_Product_Categories\" (product_category_code)\n)\n/*\nColumns in Products and 3 examples in each column for high cardinality columns :\nproduct_id : 1, 13, 11\nproduct_name : peper, cumin, chili\ntypical_buying_price : , 5.578, 503.8431967\ntypical_selling_price : , 149, 10377614.847385\nproduct_description : ut, et, vel\n*/\n/*\nColumns in Products and all categories for low cardinality columns :\ncolor_code : 1, 2, 5, 7, 4, 9\nproduct_category_code : Spices, Seeds, Herbs\nother_product_details : None\n*/\n \nCREATE TABLE film_text (\n\tfilm_id SMALLINT NOT NULL, \n\ttitle VARCHAR(255) NOT NULL, \n\tdescription TEXT, \n\tPRIMARY KEY (film_id)\n)\n/*\nColumns in film_text and all categories for low cardinality columns :\nfilm_id : \ntitle : \ndescription : \n*/\n \nCREATE TABLE \"Student\" (\n\t\"StuID\" INTEGER, \n\t\"LName\" VARCHAR(12), \n\t\"Fname\" VARCHAR(12), \n\t\"Age\" INTEGER, \n\t\"Sex\" VARCHAR(1), \n\t\"Major\" INTEGER, \n\t\"Advisor\" INTEGER, \n\tcity_code VARCHAR(3), \n\tPRIMARY KEY (\"StuID\")\n)\n/*\nColumns in Student and 3 examples in each column for high cardinality columns :\nStuID : 1003, 1017, 1002\nLName : Lee, Han, Brody\nFname : Tracy, Michael, Shiela\nAdvisor : 7723, 7712, 7134\ncity_code : ATL, HOU, LON\n*/\n/*\nColumns in Student and all categories for low cardinality columns :\nAge : 26, 16, 20, 17, 21, 19, 18, 27, 22\nSex : F, M\nMajor : 520, 100, 50, 600, 540, 550\n*/\n \nCREATE TABLE pilot (\n\t\"Pilot_ID\" INTEGER, \n\t\"Pilot_name\" TEXT, \n\t\"Rank\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Nationality\" TEXT, \n\t\"Position\" TEXT, \n\t\"Join_Year\" INTEGER, \n\t\"Team\" TEXT, \n\tPRIMARY KEY (\"Pilot_ID\")\n)\n/*\nColumns in pilot and all categories for low cardinality columns :\nPilot_ID : 1, 2, 5, 4, 3\nPilot_name : Patrick O'Bryant, Charles Oakley, Dan O'Sullivan, Hakeem Olajuwon, Jermaine O'Neal\nRank : 34, 13, 45, 6\nAge : 37, 33, 40, 32, 22\nNationality : United States, Nigeria, United Kindom\nPosition : Center Team, Forward Team, Forward-Center Team\nJoin_Year : 2009, 2001, 2008, 1999, 2010\nTeam : Fordham, Eau Claire High School, Bradley, Virginia Union, Houston\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n"}, {"role": "user", "content": "List the name, born state and age of the heads of departments ordered by age."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: name, born_state, age.\nThe SQL query I'll be generating is:\nSELECT name , born_state , age FROM head ORDER BY age"}]} {"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Graduation_College\" TEXT, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 4, 6, 3\nAge : 37, 25, 30, 34, 27, 31\nName : Tom Sheehey, Reggie Lewis, Brad Lohaus, David Butler, Tim Naegeli, Darryl Kennedy, Jerry Corcoran\nNationality : United States, United Kindom\nGraduation_College : Wisconsin–Stevens Point, California, Virginia, Iowa, Northeastern, Oklahoma\n*/\n \nCREATE TABLE \"People_Addresses\" (\n\tperson_address_id INTEGER NOT NULL, \n\tperson_id INTEGER NOT NULL, \n\taddress_id INTEGER NOT NULL, \n\tdate_from DATETIME, \n\tdate_to DATETIME, \n\tPRIMARY KEY (person_address_id), \n\tFOREIGN KEY(person_id) REFERENCES \"People\" (person_id), \n\tFOREIGN KEY(address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in People_Addresses and all categories for low cardinality columns :\nperson_address_id : 269, 276, 281, 257, 363, 396, 122, 340\nperson_id : 151, 141, 131, 171, 111, 161, 181, 121\naddress_id : 45, 9, 5, 88, 55, 92, 82, 99\ndate_from : 2013-12-26 16:57:01, 2008-07-31 02:17:25, 2012-09-26 13:21:00, 2008-05-26 20:43:41, 2014-05-10 00:32:31, 2010-11-26 05:21:12, 2017-05-01 17:32:26, 2015-05-24 16:14:12\ndate_to : 2018-03-21 09:46:30, 2018-03-08 06:16:47, 2018-02-23 22:44:18, 2018-03-11 20:26:41, 2018-03-09 02:11:12, 2018-03-09 08:45:06, 2018-03-03 16:06:17, 2018-03-12 21:10:02\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Country\" TEXT, \n\t\"Is_Male\" TEXT, \n\t\"Age\" INTEGER, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and 3 examples in each column for high cardinality columns :\nPeople_ID : 1, 11, 2\nName : Summer Duval, Annie Mediate, Juli Hanson\n*/\n/*\nColumns in people and all categories for low cardinality columns :\nCountry : Sweden, Canada, United States, Australia, Northern Ireland\nIs_Male : T, F\nAge : 26, 25, 36, 30, 34, 32, 27, 31\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE budget (\n\t\"School_id\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Budgeted\" INTEGER, \n\ttotal_budget_percent_budgeted REAL, \n\t\"Invested\" INTEGER, \n\ttotal_budget_percent_invested REAL, \n\t\"Budget_invested_percent\" TEXT, \n\tPRIMARY KEY (\"School_id\", \"Year\"), \n\tFOREIGN KEY(\"School_id\") REFERENCES \"School\" (\"School_id\")\n)\n/*\nColumns in budget and all categories for low cardinality columns :\nSchool_id : 1, 2, 5, 4, 3\nYear : 2004, 2001, 2005, 1999, 2000, 2006, 2002, 2003\nBudgeted : 113531, 3666, 61283, 119527, 6803, 4979, 74849\ntotal_budget_percent_budgeted : 1.3, 2.0, 2.4\nInvested : 8335, 5840, 95542, 146102, 2134, 85249, 140102\ntotal_budget_percent_invested : 2.7, 2.2, 2.0\nBudget_invested_percent : 71.3, 228.6, 146.9, 127.6, 228.8, 122.5, 42.9, 159.3\n*/\n \nCREATE TABLE \"Roles\" (\n\t\"Role_Code\" CHAR(15) NOT NULL, \n\t\"Role_Name\" VARCHAR(255), \n\t\"Role_Description\" VARCHAR(255), \n\tPRIMARY KEY (\"Role_Code\")\n)\n/*\nColumns in Roles and all categories for low cardinality columns :\nRole_Code : PR, HR, PT, MG, ED\nRole_Name : Proof Reader, Human Resource, Editor, Photo, Manager\nRole_Description : Itaque dolor ut nemo rerum vitae provident. Vel laborum ipsum velit sint. Et est omnis dignissimos., Ut sed quae eaque mollitia qui hic. Natus ea expedita et odio illum fugiat qui natus. Consequatur velit ut dolorem cum ullam esse deserunt dignissimos. Enim non non rem officiis quis., Aut modi nihil molestias temporibus sit rerum. Sit neque eaque odio omnis incidunt., Vero harum corrupti odit ipsa vero et odio. Iste et recusandae temporibus maxime. Magni aspernatur fugit quis explicabo totam esse corrupti., Et totam est quibusdam aspernatur ut. Vitae perferendis eligendi voluptatem molestiae rem ut enim. Ipsum expedita quae earum unde est. Repellendus ut ipsam nihil accusantium sit. Magni accusantium numquam quod et.\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\tinvoice_date DATETIME, \n\tinvoice_details VARCHAR(255), \n\tPRIMARY KEY (invoice_number)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 1987-11-15 15:41:40, 1998-07-30 15:39:12, 1974-07-27 16:18:49\ninvoice_details : doloremque, eveniet, earum\n*/\n\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE club_rank (\n\t\"Rank\" REAL, \n\t\"Club_ID\" INTEGER, \n\t\"Gold\" REAL, \n\t\"Silver\" REAL, \n\t\"Bronze\" REAL, \n\t\"Total\" REAL, \n\tPRIMARY KEY (\"Rank\", \"Club_ID\"), \n\tFOREIGN KEY(\"Club_ID\") REFERENCES club (\"Club_ID\")\n)\n/*\nColumns in club_rank and all categories for low cardinality columns :\nRank : 3.0, 1.0, 2.0, 6.0, 4.0, 5.0\nClub_ID : 1, 2, 5, 4, 6, 3\nGold : 8.0, 3.0, 11.0, 7.0, 2.0, 4.0\nSilver : 3.0, 11.0, 1.0, 7.0, 2.0, 4.0\nBronze : 6.0, 9.0, 0.0, 2.0\nTotal : 3.0, 12.0, 13.0, 6.0, 21.0, 31.0\n*/\n \nCREATE TABLE \"Staff\" (\n\tstaff_id INTEGER, \n\tstaff_gender VARCHAR(1), \n\tstaff_name VARCHAR(80), \n\tPRIMARY KEY (staff_id)\n)\n/*\nColumns in Staff and 3 examples in each column for high cardinality columns :\nstaff_id : 1, 13, 11\nstaff_name : Yolanda, Tom, Melody\n*/\n/*\nColumns in Staff and all categories for low cardinality columns :\nstaff_gender : 1, 0\n*/\n \nCREATE TABLE \"Problem_Log\" (\n\tproblem_log_id INTEGER, \n\tassigned_to_staff_id INTEGER NOT NULL, \n\tproblem_id INTEGER NOT NULL, \n\tproblem_category_code VARCHAR(20) NOT NULL, \n\tproblem_status_code VARCHAR(20) NOT NULL, \n\tlog_entry_date DATETIME, \n\tlog_entry_description VARCHAR(255), \n\tlog_entry_fix VARCHAR(255), \n\tother_log_details VARCHAR(255), \n\tPRIMARY KEY (problem_log_id), \n\tFOREIGN KEY(problem_status_code) REFERENCES \"Problem_Status_Codes\" (problem_status_code), \n\tFOREIGN KEY(problem_id) REFERENCES \"Problems\" (problem_id), \n\tFOREIGN KEY(assigned_to_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(problem_category_code) REFERENCES \"Problem_Category_Codes\" (problem_category_code)\n)\n/*\nColumns in Problem_Log and 3 examples in each column for high cardinality columns :\nproblem_log_id : 1, 13, 11\nlog_entry_date : 1997-08-31 08:19:12, 1974-09-13 00:37:26, 2011-11-12 23:30:53\nlog_entry_description : t, d, b\nlog_entry_fix : t, i, s\nother_log_details : d, f, b\n*/\n/*\nColumns in Problem_Log and all categories for low cardinality columns :\nassigned_to_staff_id : 1, 13, 11, 2, 5, 8, 4, 6, 12, 10\nproblem_id : 1, 13, 11, 10, 7, 8, 4, 12, 15\nproblem_category_code : Datatabase, Middleware, GUI\nproblem_status_code : Solved, Reported\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\torder_id INTEGER NOT NULL, \n\tinvoice_date DATETIME, \n\tPRIMARY KEY (invoice_number), \n\tFOREIGN KEY(order_id) REFERENCES \"Orders\" (order_id)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 2018-03-16 21:57:43, 2018-03-20 00:21:41, 2018-03-05 08:47:33\n*/\n/*\nColumns in Invoices and all categories for low cardinality columns :\norder_id : 13, 11, 5, 8, 4, 12, 3, 14, 10, 9\n*/\n \nCREATE TABLE \"Customer_Events\" (\n\t\"Customer_Event_ID\" INTEGER NOT NULL, \n\tcustomer_id INTEGER, \n\tdate_moved_in DATETIME, \n\tproperty_id INTEGER, \n\tresident_id INTEGER, \n\tthing_id INTEGER NOT NULL, \n\tPRIMARY KEY (\"Customer_Event_ID\"), \n\tFOREIGN KEY(thing_id) REFERENCES \"Things\" (thing_id), \n\tFOREIGN KEY(customer_id) REFERENCES \"Customers\" (customer_id), \n\tFOREIGN KEY(resident_id, property_id, date_moved_in) REFERENCES \"Residents\" (resident_id, property_id, date_moved_in)\n)\n/*\nColumns in Customer_Events and 3 examples in each column for high cardinality columns :\nCustomer_Event_ID : 428, 216, 226\nresident_id : 87, 38, 37\n*/\n/*\nColumns in Customer_Events and all categories for low cardinality columns :\ncustomer_id : 79, 91, 16, 5, 4, 63, 19, 31\ndate_moved_in : 2016-09-27 12:00:00, 2015-03-27 12:00:00\nproperty_id : 954, 120, 108, 605, 629, 748, 669, 445, 879, 107\nthing_id : 1, 46, 10, 80, 2, 92, 14, 85, 68\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE discipline_enrollments (\n\t\"Campus\" INTEGER, \n\t\"Discipline\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Undergraduate\" INTEGER, \n\t\"Graduate\" INTEGER, \n\tPRIMARY KEY (\"Campus\", \"Discipline\"), \n\tFOREIGN KEY(\"Campus\") REFERENCES \"Campuses\" (\"Id\")\n)\n/*\nColumns in discipline_enrollments and 3 examples in each column for high cardinality columns :\nCampus : 1, 13, 11\nDiscipline : 19, 17, 21\nUndergraduate : 986, 65, 815\nGraduate : 56, 377, 187\n*/\n/*\nColumns in discipline_enrollments and all categories for low cardinality columns :\nYear : 2004\n*/\n \nCREATE TABLE country (\n\t\"Country_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Population\" INTEGER, \n\t\"Area\" INTEGER, \n\t\"Languages\" TEXT, \n\tPRIMARY KEY (\"Country_ID\")\n)\n/*\nColumns in country and all categories for low cardinality columns :\nCountry_ID : 1, 2, 3\nName : Austria, Sweden, Finland\nPopulation : 8206524, 9047752, 5261008\nArea : 83871, 338145, 449964\nLanguages : Swedish, Finnish Swedish, German\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n"}, {"role": "user", "content": "List the creation year, name and budget of each department."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: department.\nFrom these tables, I'll be using the following columns: creation, name, budget_in_billions.\nThe SQL query I'll be generating is:\nSELECT creation , name , budget_in_billions FROM department"}]} - -// Add more training inputs following the format above \ No newline at end of file diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py index b421541b..6f2e18cf 100644 --- a/hyperbolic_agentkit_core/actions/run_remote_finetune.py +++ b/hyperbolic_agentkit_core/actions/run_remote_finetune.py @@ -57,10 +57,17 @@ def _run( return json.dumps({"status": "error", "message": f"Failed to connect to remote GPU: {ssh_result}"}) # Step 2: Setup remote environment - # Install rsync - shell_result = ssh_manager.execute("sudo apt-get update && sudo apt-get install -y rsync") - if isinstance(shell_result, str) and "error" in shell_result.lower(): - return json.dumps({"status": "error", "message": f"Failed to install rsync: {shell_result}"}) + # Install required system packages + shell_result = ssh_manager.execute( + "sudo apt-get update && " + "sudo apt-get install -y rsync python3-dev python3-pip build-essential libcurl4-openssl-dev" + "ninja-build git cmake pkg-config nano" + ) + # Only check for critical errors, ignore apt-utils warning + if isinstance(shell_result, str) and ("E: Unable to locate package" in shell_result or + "E: Failed to fetch" in shell_result or + "E: Could not install" in shell_result): + return json.dumps({"status": "error", "message": f"Failed to install required packages: {shell_result}"}) # Step 3: Sync files to remote GPU @@ -76,7 +83,7 @@ def _run( "source venv/bin/activate && " "pip install -r requirements.txt && " f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " - "python3 test_inference.py 'Which country has the highest population?'" + "python3 test_inference.py 'How old is Tiger Woods?'" "'" ) From e9e1ebecf099c10747b9f4fb136dd5f88b4cb01f Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Thu, 13 Feb 2025 16:57:16 -0500 Subject: [PATCH 06/10] remove unnecessary remote installs --- hyperbolic_agentkit_core/actions/run_remote_finetune.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py index 6f2e18cf..a1608727 100644 --- a/hyperbolic_agentkit_core/actions/run_remote_finetune.py +++ b/hyperbolic_agentkit_core/actions/run_remote_finetune.py @@ -60,8 +60,7 @@ def _run( # Install required system packages shell_result = ssh_manager.execute( "sudo apt-get update && " - "sudo apt-get install -y rsync python3-dev python3-pip build-essential libcurl4-openssl-dev" - "ninja-build git cmake pkg-config nano" + "sudo apt-get install -y rsync python3-dev python3-pip build-essential git cmake pkg-config nano" ) # Only check for critical errors, ignore apt-utils warning if isinstance(shell_result, str) and ("E: Unable to locate package" in shell_result or From cad1c8a2f35ca6c7fa57502f6cfcb12e47e287cf Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Sun, 23 Feb 2025 23:09:38 -0500 Subject: [PATCH 07/10] clean up fine tune example for PR --- .env.example | 4 +- chatbot.py | 6 - finetune_example/README.md | 28 ++ hyperbolic_agentkit_core/actions/__init__.py | 3 +- .../actions/remote_finetune_example.py | 208 +++++++++++ .../actions/run_remote_finetune.py | 327 ------------------ .../agent_toolkits/hyperbolic_toolkit.py | 1 + 7 files changed, 242 insertions(+), 335 deletions(-) create mode 100644 finetune_example/README.md create mode 100644 hyperbolic_agentkit_core/actions/remote_finetune_example.py delete mode 100644 hyperbolic_agentkit_core/actions/run_remote_finetune.py diff --git a/.env.example b/.env.example index 8d80c67a..53c707c0 100644 --- a/.env.example +++ b/.env.example @@ -11,8 +11,10 @@ CDP_API_KEY_PRIVATE_KEY=your_cdp_api_key_private_key # Hyperbolic (Required) HYPERBOLIC_API_KEY=your_hyperbolic_api_key +# RSA key file path (Required) SSH_PRIVATE_KEY_PATH=~/.ssh/id_rsa +# LangChain (Required for Finetune tool) LANGCHAIN_TRACING_V2=true LANGCHAIN_ENDPOINT="your_langchain_endpoint" LANGCHAIN_API_KEY="your_langchain_api_key" @@ -50,7 +52,7 @@ USE_RETWEET=true USE_DEPLOY_MULTITOKEN=true USE_WEB_SEARCH=true USE_REQUEST_TOOLS=true -USE_FINETUNE_TOOLS=true +USE_FINETUNE_TOOLS=false ELEVEN_API_KEY=your_eleven_api_key \ No newline at end of file diff --git a/chatbot.py b/chatbot.py index 09084cb9..1239cfcb 100644 --- a/chatbot.py +++ b/chatbot.py @@ -72,8 +72,6 @@ from langchain_core.runnables import RunnableConfig from podcast_agent.podcast_knowledge_base import PodcastKnowledgeBase -from hyperbolic_agentkit_core.actions.run_remote_finetune import RunFinetuneAction - async def generate_llm_podcast_query(llm: ChatAnthropic = None) -> str: """ Generates a dynamic, contextually-aware query for the podcast knowledge base using an LLM. @@ -896,10 +894,6 @@ async def initialize_agent(): print_error(f"Error initializing GitHub tools: {str(e)}") print_error("GitHub tools will not be available") - # Add fine-tuning tool - if os.getenv("USE_FINETUNE_TOOLS", "false").lower() == "true": - tools.append(RunFinetuneAction()) - # Create the runnable config with increased recursion limit runnable_config = RunnableConfig(recursion_limit=200) diff --git a/finetune_example/README.md b/finetune_example/README.md new file mode 100644 index 00000000..2be27581 --- /dev/null +++ b/finetune_example/README.md @@ -0,0 +1,28 @@ +USE_FINETUNE_TOOLS=true + +LANGCHAIN keys need to be set in .env file + +SSH_PRIVATE_KEY_PATH make sure to have RSA + + +Rent a GPU from Hyperbolic + +"Run a fine tuning task using Mistral 7b using get_gpu_status first" + +the env variables you NEED to set are +``` +ANTHROPIC_API_KEY= +CDP_API_KEY_NAME= +CDP_API_KEY_PRIVATE_KEY= + +HYPERBOLIC_API_KEY= + +SSH_PRIVATE_KEY_PATH=/path/to/your/.ssh/id_rsa + +LANGCHAIN_TRACING_V2=true +LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +LANGCHAIN_API_KEY= +LANGCHAIN_PROJECT= + +USE_FINETUNE_TOOLS=true +``` \ No newline at end of file diff --git a/hyperbolic_agentkit_core/actions/__init__.py b/hyperbolic_agentkit_core/actions/__init__.py index 10012a00..99df08c3 100644 --- a/hyperbolic_agentkit_core/actions/__init__.py +++ b/hyperbolic_agentkit_core/actions/__init__.py @@ -8,6 +8,7 @@ from hyperbolic_agentkit_core.actions.terminate_compute import TerminateComputeAction from hyperbolic_agentkit_core.actions.get_current_balance import GetCurrentBalanceAction from hyperbolic_agentkit_core.actions.link_wallet_address import LinkWalletAddressAction +from hyperbolic_agentkit_core.actions.remote_finetune_example import RunFinetuneAction # WARNING: All new HyperbolicAction subclasses must be imported above, otherwise they will not be discovered # by get_all_hyperbolic_actions(). The import ensures the class is registered as a subclass of HyperbolicAction. def get_all_hyperbolic_actions() -> list[type[HyperbolicAction]]: @@ -22,5 +23,5 @@ def get_all_hyperbolic_actions() -> list[type[HyperbolicAction]]: __all__ = [ "HYPERBOLIC_ACTIONS", "HyperbolicAction", "RentComputeAction", "GetAvailableGpusAction", - "GetGpuStatusAction", "SSHAccessAction", "RemoteShellAction", "TerminateComputeAction", "GetSpendHistoryAction", "GetCurrentBalanceAction", "LinkWalletAddressAction" + "GetGpuStatusAction", "SSHAccessAction", "RemoteShellAction", "TerminateComputeAction", "GetSpendHistoryAction", "GetCurrentBalanceAction", "LinkWalletAddressAction", "RunFinetuneAction" ] diff --git a/hyperbolic_agentkit_core/actions/remote_finetune_example.py b/hyperbolic_agentkit_core/actions/remote_finetune_example.py new file mode 100644 index 00000000..b0f1f5ca --- /dev/null +++ b/hyperbolic_agentkit_core/actions/remote_finetune_example.py @@ -0,0 +1,208 @@ +import requests +import json +import os +import subprocess +from typing import Optional, Dict, Any +from collections.abc import Callable +from pydantic import BaseModel, Field +from langchain.tools import BaseTool +from langchain.callbacks.manager import CallbackManagerForToolRun + +from hyperbolic_agentkit_core.actions.hyperbolic_action import HyperbolicAction + +from hyperbolic_agentkit_core.actions.ssh_manager import ssh_manager +from hyperbolic_agentkit_core.actions.get_gpu_status import get_gpu_status + + +class RunFinetuneInput(BaseModel): + """Input argument schema for fine-tuning action.""" + model_name: str = Field( + ..., + description="The name of the base model to fine-tune" + ) + +def run_finetune(model_name: str) -> str: + """Run the example finetune action workflow. + + Args: + model_name (str): The name of the base model to fine-tune + + Returns: + str: JSON string containing the status and results of the fine-tuning process + """ + try: + # Step 0: Get GPU status + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return json.dumps({"status": "error", "message": "No active GPU instances found"}) + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = int(parts[3]) + username = parts[1].split('@')[0] + + # Step 1: Establish SSH connection first + ssh_result = ssh_manager.connect(host=host, port=port, username=username) + if isinstance(ssh_result, str) and "Error" in ssh_result: + return json.dumps({"status": "error", "message": f"Failed to connect to remote GPU: {ssh_result}"}) + + # Step 2: Setup remote environment + shell_result = ssh_manager.execute( + "sudo apt-get update && " + "sudo apt-get install -y rsync python3-dev python3-pip build-essential git cmake pkg-config nano" + ) + if isinstance(shell_result, str) and ("E: Unable to locate package" in shell_result or + "E: Failed to fetch" in shell_result or + "E: Could not install" in shell_result): + return json.dumps({"status": "error", "message": f"Failed to install required packages: {shell_result}"}) + + # Step 3: Sync files to remote GPU + sync_result = sync_to_remote() + if isinstance(sync_result, dict) and not sync_result.get("success", False): + return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) + + # Step 4: Execute setup, training, and test inference in a single shell session + combined_command = ( + "cd finetune_example && " + "bash -c '" + "python3 -m venv venv && " + "source venv/bin/activate && " + "pip install -r requirements.txt && " + f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " + "python3 test_inference.py 'How old is Tiger Woods?'" + "'" + ) + + shell_result = ssh_manager.execute(combined_command) + + # Check for the finetuned_model directory + verify_result = ssh_manager.execute("test -d /home/ubuntu/finetune_example/finetuned_model && echo 'exists'") + if "exists" not in str(verify_result): + return json.dumps({ + "status": "error", + "message": f"Fine-tuning failed or directory not created. Output: {shell_result}" + }) + + # Get the inference output + inference_output = ssh_manager.execute("cat /home/ubuntu/finetune_example/inference_output.json") + try: + inference_result = json.loads(inference_output) + except: + inference_result = {"error": "Failed to parse inference output"} + + return json.dumps({ + "status": "success", + "message": "Fine-tuning completed successfully", + "model_name": model_name, + "test_inference_output": inference_result + }) + + except Exception as e: + return json.dumps({ + "status": "error", + "message": str(e) + }) + +class RunFinetuneAction(HyperbolicAction): + """Run the example finetune action workflow.""" + + name: str = "run_finetune" + description: str = """This tool will execute fine-tuning of an AI model on Hyperbolic's GPU infrastructure. + It takes the model name as input (e.g., "unsloth/mistral-7b-v0.3-bnb-4bit"). + The model will be fine-tuned using unsloth and set up for local inference using vLLM. + Training data should be prepared in advance in the data/training_data.jsonl file.""" + args_schema: type[BaseModel] = RunFinetuneInput + return_direct: bool = False + func: Callable[..., str] = run_finetune + + + +def sync_to_remote() -> Dict[str, bool]: + """Syncs local files to remote GPU for fine-tuning.""" + try: + # Get GPU status and SSH details + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return {"success": False, "error": "No active GPU instances found"} + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command with error handling + try: + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = parts[3] + username = parts[1].split('@')[0] + except (IndexError, AttributeError) as e: + return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} + + # Required files to sync + local_files = [ + "./finetune_example/training_data.jsonl", + "./finetune_example/requirements.txt", + "./finetune_example/finetune.py", + "./finetune_example/test_inference.py" + ] + + + # Check if all required files exist + for local_file in local_files: + if not os.path.exists(local_file): + return { + "success": False, + "error": f"Required file not found: {local_file}" + } + + # Create base directory on remote + result = ssh_manager.execute("mkdir -p ~/finetune_example") + if isinstance(result, str) and "error" in result.lower(): + return { + "success": False, + "error": f"Failed to create base directory: {result}" + } + + + # Sync each file with error handling + for local_file in local_files: + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" + + # Run rsync with output capture + result = subprocess.run([ + "rsync", "-avz", + "-e", f"ssh -p {port}", + local_file, + f"{username}@{host}:{remote_path}" + ], capture_output=True, text=True) + + if result.returncode != 0: + return { + "success": False, + "error": f"Failed to sync {local_file}: {result.stderr}" + } + + # Verify files were synced correctly + for local_file in local_files: + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" + check_result = ssh_manager.execute(f"test -f {remote_path} && echo 'exists'") + + if "exists" not in str(check_result): + return { + "success": False, + "error": f"Failed to verify file sync for: {remote_path}" + } + + return { + "success": True, + "message": "All files synced successfully" + } + + except Exception as e: + return { + "success": False, + "error": f"Unexpected error during file sync: {str(e)}" + } \ No newline at end of file diff --git a/hyperbolic_agentkit_core/actions/run_remote_finetune.py b/hyperbolic_agentkit_core/actions/run_remote_finetune.py deleted file mode 100644 index a1608727..00000000 --- a/hyperbolic_agentkit_core/actions/run_remote_finetune.py +++ /dev/null @@ -1,327 +0,0 @@ -import requests -import json -import os -import subprocess -from typing import Optional, Dict, Any -from collections.abc import Callable -from pydantic import BaseModel, Field -from langchain.tools import BaseTool -from langchain.callbacks.manager import CallbackManagerForToolRun - -from hyperbolic_agentkit_core.actions.hyperbolic_action import HyperbolicAction - -from hyperbolic_agentkit_core.actions.ssh_manager import ssh_manager -from hyperbolic_agentkit_core.actions.get_gpu_status import get_gpu_status - - -class RunFinetuneInput(BaseModel): - """Input argument schema for fine-tuning action.""" - model_name: str = Field( - ..., - description="The name of the base model to fine-tune" - ) - - -class RunFinetuneAction(BaseTool): - name: str = "run_finetune" - description: str = """This tool will execute fine-tuning of an AI model on Hyperbolic's GPU infrastructure. - It takes the model name as input (e.g., "unsloth/mistral-7b-v0.3-bnb-4bit"). - The model will be fine-tuned using unsloth and set up for local inference using vLLM. - Training data should be prepared in advance in the data/training_data.jsonl file.""" - args_schema: type[BaseModel] = RunFinetuneInput - return_direct: bool = False - - def _run( - self, - model_name: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - try: - # Step 0: Get GPU status - gpu_status = get_gpu_status() - if not gpu_status.get("instances"): - return json.dumps({"status": "error", "message": "No active GPU instances found"}) - - instance = gpu_status["instances"][0] - ssh_command = instance["sshCommand"] - - # Parse SSH command - parts = ssh_command.split() - host = parts[1].split('@')[1] - port = int(parts[3]) - username = parts[1].split('@')[0] - - # Step 1: Establish SSH connection first - ssh_result = ssh_manager.connect(host=host, port=port, username=username) - if isinstance(ssh_result, str) and "Error" in ssh_result: - return json.dumps({"status": "error", "message": f"Failed to connect to remote GPU: {ssh_result}"}) - - # Step 2: Setup remote environment - # Install required system packages - shell_result = ssh_manager.execute( - "sudo apt-get update && " - "sudo apt-get install -y rsync python3-dev python3-pip build-essential git cmake pkg-config nano" - ) - # Only check for critical errors, ignore apt-utils warning - if isinstance(shell_result, str) and ("E: Unable to locate package" in shell_result or - "E: Failed to fetch" in shell_result or - "E: Could not install" in shell_result): - return json.dumps({"status": "error", "message": f"Failed to install required packages: {shell_result}"}) - - - # Step 3: Sync files to remote GPU - sync_result = sync_to_remote() - if isinstance(sync_result, dict) and not sync_result.get("success", False): - return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) - - # Step 4: Execute setup, training, and test inference in a single shell session - combined_command = ( - "cd finetune_example && " - "bash -c '" - "python3 -m venv venv && " - "source venv/bin/activate && " - "pip install -r requirements.txt && " - f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " - "python3 test_inference.py 'How old is Tiger Woods?'" - "'" - ) - - shell_result = ssh_manager.execute(combined_command) - - # Check for the finetuned_model directory - verify_result = ssh_manager.execute("test -d /home/ubuntu/finetune_example/finetuned_model && echo 'exists'") - if "exists" not in str(verify_result): - return json.dumps({ - "status": "error", - "message": f"Fine-tuning failed or directory not created. Output: {shell_result}" - }) - - # Get the inference output - inference_output = ssh_manager.execute("cat /home/ubuntu/finetune_example/inference_output.json") - try: - inference_result = json.loads(inference_output) - except: - inference_result = {"error": "Failed to parse inference output"} - - return json.dumps({ - "status": "success", - "message": "Fine-tuning completed successfully", - "model_name": model_name, - "test_inference_output": inference_result - }) - - except Exception as e: - return json.dumps({ - "status": "error", - "message": str(e) - }) - - async def _arun( - self, - model_name: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Async implementation of the tool.""" - return self._run(model_name) - -def parse_metrics(output: str) -> Dict[str, Any]: - """Parse training metrics from command output.""" - metrics = {} - try: - # Look for lines containing metrics in the format "metric: value" - for line in output.split('\n'): - if ':' in line: - key, value = line.split(':', 1) - key = key.strip() - value = value.strip() - try: - # Try to convert to float if possible - value = float(value) - except: - pass - metrics[key] = value - except Exception as e: - metrics["parse_error"] = str(e) - return metrics - - - -def setup_vllm_inference(model_name: str) -> Dict: - """Sets up vLLM for inference with the fine-tuned model.""" - try: - ssh_string = os.getenv("HYPERBOLIC_SSH_STRING") - if not ssh_string: - raise ValueError("HYPERBOLIC_SSH_STRING not found in environment") - - # Install vLLM and setup inference - setup_cmd = [ - "ssh", ssh_string, - f"cd /home/ubuntu/finetune_example && " - f"source venv/bin/activate && " - f"pip install vllm && " - f"python3 -c '" - f"from vllm import LLM; " - f"llm = LLM(model=\"/home/ubuntu/finetune_example/finetuned_model\"); " - f"print(\"vLLM setup complete\")'" - ] - - result = subprocess.run(setup_cmd, capture_output=True, text=True, check=True) - - return { - "success": True, - "status": "vLLM inference setup complete" - } - - except Exception as e: - return {"success": False, "error": str(e)} - -def sync_from_remote() -> Dict[str, bool]: - """Syncs fine-tuned model from remote GPU.""" - try: - # Get GPU status and SSH details - gpu_status = get_gpu_status() - if not gpu_status.get("instances"): - return {"success": False, "error": "No active GPU instances found"} - - instance = gpu_status["instances"][0] - ssh_command = instance["sshCommand"] - - # Parse SSH command with error handling - try: - parts = ssh_command.split() - host = parts[1].split('@')[1] - port = parts[3] - username = parts[1].split('@')[0] - except (IndexError, AttributeError) as e: - return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} - - # Create local directory if it doesn't exist - local_dir = "../../finetune_example/finetuned_model" # This is right - os.makedirs(local_dir, exist_ok=True) - - # Verify remote directory exists - remote_dir = "/home/ubuntu/finetune_example/finetuned_model/" # TODO: this will be wrong - check_result = ssh_manager.execute(f"test -d {remote_dir} && echo 'exists'") - if "exists" not in str(check_result): - return { - "success": False, - "error": f"Remote directory not found: {remote_dir}" - } - - # Sync fine-tuned model back with error handling - result = subprocess.run([ - "rsync", "-avz", - "-e", f"ssh -p {port}", - f"{username}@{host}:{remote_dir}", - local_dir - ], capture_output=True, text=True) - - if result.returncode != 0: - return { - "success": False, - "error": f"Failed to sync from remote: {result.stderr}" - } - - # Verify files were synced correctly - if not os.path.exists(local_dir) or not os.listdir(local_dir): - return { - "success": False, - "error": "Sync completed but no files found in local directory" - } - - return { - "success": True, - "message": "Model files synced successfully from remote" - } - - except Exception as e: - return { - "success": False, - "error": f"Unexpected error during file sync: {str(e)}" - } - -def sync_to_remote() -> Dict[str, bool]: - """Syncs local files to remote GPU for fine-tuning.""" - try: - # Get GPU status and SSH details - gpu_status = get_gpu_status() - if not gpu_status.get("instances"): - return {"success": False, "error": "No active GPU instances found"} - - instance = gpu_status["instances"][0] - ssh_command = instance["sshCommand"] - - # Parse SSH command with error handling - try: - parts = ssh_command.split() - host = parts[1].split('@')[1] - port = parts[3] - username = parts[1].split('@')[0] - except (IndexError, AttributeError) as e: - return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} - - # Required files to sync - local_files = [ - "./finetune_example/training_data.jsonl", - "./finetune_example/requirements.txt", - "./finetune_example/finetune.py", - "./finetune_example/test_inference.py" - ] - - - # Check if all required files exist - for local_file in local_files: - if not os.path.exists(local_file): - return { - "success": False, - "error": f"Required file not found: {local_file}" - } - - # Create base directory on remote - result = ssh_manager.execute("mkdir -p ~/finetune_example") - if isinstance(result, str) and "error" in result.lower(): - return { - "success": False, - "error": f"Failed to create base directory: {result}" - } - - - # Sync each file with error handling - for local_file in local_files: - remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" - - # Run rsync with output capture - result = subprocess.run([ - "rsync", "-avz", - "-e", f"ssh -p {port}", - local_file, - f"{username}@{host}:{remote_path}" - ], capture_output=True, text=True) - - if result.returncode != 0: - return { - "success": False, - "error": f"Failed to sync {local_file}: {result.stderr}" - } - - # Verify files were synced correctly - for local_file in local_files: - remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" - check_result = ssh_manager.execute(f"test -f {remote_path} && echo 'exists'") - - if "exists" not in str(check_result): - return { - "success": False, - "error": f"Failed to verify file sync for: {remote_path}" - } - - return { - "success": True, - "message": "All files synced successfully" - } - - except Exception as e: - return { - "success": False, - "error": f"Unexpected error during file sync: {str(e)}" - } \ No newline at end of file diff --git a/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py b/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py index 3d07d661..4b422534 100644 --- a/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py +++ b/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py @@ -57,6 +57,7 @@ class HyperbolicToolkit(BaseToolkit): get_current_balance ssh_access link_wallet_address + run_remote_finetune Use within an agent: .. code-block:: python From 05677d16bc9ac4b6b1ad21ca62650aed125617ef Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Sun, 23 Feb 2025 23:48:11 -0500 Subject: [PATCH 08/10] update fine tune README.md --- chatbot.py | 2 + finetune_example/README.md | 67 ++++++++++++++++--- .../actions/remote_finetune_example.py | 2 +- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/chatbot.py b/chatbot.py index 1239cfcb..49530b02 100644 --- a/chatbot.py +++ b/chatbot.py @@ -894,6 +894,8 @@ async def initialize_agent(): print_error(f"Error initializing GitHub tools: {str(e)}") print_error("GitHub tools will not be available") + + # Create the runnable config with increased recursion limit runnable_config = RunnableConfig(recursion_limit=200) diff --git a/finetune_example/README.md b/finetune_example/README.md index 2be27581..a49b8a08 100644 --- a/finetune_example/README.md +++ b/finetune_example/README.md @@ -1,15 +1,11 @@ -USE_FINETUNE_TOOLS=true - -LANGCHAIN keys need to be set in .env file - -SSH_PRIVATE_KEY_PATH make sure to have RSA - +# Quickstart +Demo video: https://www.loom.com/share/13dfa667db9f496188df284cb15c392b?sid=397e07f7-fb69-472e-8f93-29abea759ce8 -Rent a GPU from Hyperbolic +* It is recommended to rent a GPU from the Hyperbolic web app first to simplify to agent flow, but this is optional. -"Run a fine tuning task using Mistral 7b using get_gpu_status first" - -the env variables you NEED to set are +## Setup +0. Make sure all the installation steps in the main README are completed. +1. First, set up the following REQUIRED env variables: ``` ANTHROPIC_API_KEY= CDP_API_KEY_NAME= @@ -23,6 +19,55 @@ LANGCHAIN_TRACING_V2=true LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" LANGCHAIN_API_KEY= LANGCHAIN_PROJECT= +``` +Make sure to also set the following to "true" in your .env file: +``` USE_FINETUNE_TOOLS=true -``` \ No newline at end of file +``` +2. Boot up the chatbot gradio interface: +``` +poetry run python gradio_ui.py +``` + +3. Type in the following prompt in the chatbot interface: +``` +Run a fine tuning task using Mistral 7b using get_gpu_status first +``` + +4. Check your console logs and also SSH into your remote GPU instance to track the progress of the fine tuning task. +``` +ssh ubuntu@ -p XXXXX +cd finetune_example +ls +``` + +5. Once the fine tuning task is complete, you will see a "success" message in the chatbot interface. + +6. You can now use the fine tuned model for inference in your remote GPU instance by running: +``` +source venv/bin/activate +python3 test_inference.py "Your prompt here" +``` + +7. You can also edit the finetune.py script or training_data.jsonl file by running: +``` +nano finetune.py +nano training_data.jsonl +``` + +8. You can reinitiate another fine tuning task using your newly updated parameters (or another base model) by running: +``` +export FINE_TUNE_MODEL="unsloth/mistral-7b-v0.3-bnb-4bit" +python3 finetune.py +``` + +or you can just ask the Hyperbolic agent through the chat interface to do it for you. + +9. You can delete the finetuned model in your remote GPU instance by running: +``` +rm -rf finetuned_model +``` + +* Support for syncing the finetuned model back to your local machine is coming soon. + diff --git a/hyperbolic_agentkit_core/actions/remote_finetune_example.py b/hyperbolic_agentkit_core/actions/remote_finetune_example.py index b0f1f5ca..8e9cf437 100644 --- a/hyperbolic_agentkit_core/actions/remote_finetune_example.py +++ b/hyperbolic_agentkit_core/actions/remote_finetune_example.py @@ -73,7 +73,7 @@ def run_finetune(model_name: str) -> str: "source venv/bin/activate && " "pip install -r requirements.txt && " f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " - "python3 test_inference.py 'How old is Tiger Woods?'" + "python3 test_inference.py 'What can you tell me about LLMs?'" "'" ) From 9c397cab017ae2c4e8fb133a97e70f575df8281e Mon Sep 17 00:00:00 2001 From: Zile <75930156+ZILECAO@users.noreply.github.com> Date: Sun, 23 Feb 2025 23:48:43 -0500 Subject: [PATCH 09/10] Update README.md --- finetune_example/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finetune_example/README.md b/finetune_example/README.md index a49b8a08..11bc4405 100644 --- a/finetune_example/README.md +++ b/finetune_example/README.md @@ -1,7 +1,7 @@ # Quickstart Demo video: https://www.loom.com/share/13dfa667db9f496188df284cb15c392b?sid=397e07f7-fb69-472e-8f93-29abea759ce8 -* It is recommended to rent a GPU from the Hyperbolic web app first to simplify to agent flow, but this is optional. +* It is recommended to rent a GPU from the Hyperbolic web app first to simplify the agent flow, but this is optional. ## Setup 0. Make sure all the installation steps in the main README are completed. From 06882478f0e79b8cf916d4e016b643f0e88ecd79 Mon Sep 17 00:00:00 2001 From: ZILECAO Date: Sun, 23 Feb 2025 23:50:15 -0500 Subject: [PATCH 10/10] update Fine tune example README.md --- .env.example | 2 -- finetune_example/README.md | 4 ---- 2 files changed, 6 deletions(-) diff --git a/.env.example b/.env.example index 53c707c0..133b097d 100644 --- a/.env.example +++ b/.env.example @@ -52,7 +52,5 @@ USE_RETWEET=true USE_DEPLOY_MULTITOKEN=true USE_WEB_SEARCH=true USE_REQUEST_TOOLS=true -USE_FINETUNE_TOOLS=false - ELEVEN_API_KEY=your_eleven_api_key \ No newline at end of file diff --git a/finetune_example/README.md b/finetune_example/README.md index 11bc4405..26bc967f 100644 --- a/finetune_example/README.md +++ b/finetune_example/README.md @@ -21,10 +21,6 @@ LANGCHAIN_API_KEY= LANGCHAIN_PROJECT= ``` -Make sure to also set the following to "true" in your .env file: -``` -USE_FINETUNE_TOOLS=true -``` 2. Boot up the chatbot gradio interface: ``` poetry run python gradio_ui.py