diff --git a/.env.example b/.env.example index cd558170..133b097d 100644 --- a/.env.example +++ b/.env.example @@ -11,8 +11,10 @@ CDP_API_KEY_PRIVATE_KEY=your_cdp_api_key_private_key # Hyperbolic (Required) HYPERBOLIC_API_KEY=your_hyperbolic_api_key +# RSA key file path (Required) SSH_PRIVATE_KEY_PATH=~/.ssh/id_rsa +# LangChain (Required for Finetune tool) LANGCHAIN_TRACING_V2=true LANGCHAIN_ENDPOINT="your_langchain_endpoint" LANGCHAIN_API_KEY="your_langchain_api_key" @@ -51,5 +53,4 @@ USE_DEPLOY_MULTITOKEN=true USE_WEB_SEARCH=true USE_REQUEST_TOOLS=true - ELEVEN_API_KEY=your_eleven_api_key \ No newline at end of file diff --git a/.gitignore b/.gitignore index c75cdefd..41626e2b 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ id_rsa.pub # Project specific wallet_data.txt +twitter_state_default.db .replit .python-version @@ -69,3 +70,7 @@ videofiles/ jsonoutputs/ +# Finetuning +finetuned_model +remote_file_sync.py +finetune_example/training_data2.jsonl \ No newline at end of file diff --git a/chatbot.py b/chatbot.py index 215e93d6..49530b02 100644 --- a/chatbot.py +++ b/chatbot.py @@ -894,7 +894,7 @@ async def initialize_agent(): print_error(f"Error initializing GitHub tools: {str(e)}") print_error("GitHub tools will not be available") - + # Create the runnable config with increased recursion limit runnable_config = RunnableConfig(recursion_limit=200) diff --git a/finetune_example/README.md b/finetune_example/README.md new file mode 100644 index 00000000..26bc967f --- /dev/null +++ b/finetune_example/README.md @@ -0,0 +1,69 @@ +# Quickstart +Demo video: https://www.loom.com/share/13dfa667db9f496188df284cb15c392b?sid=397e07f7-fb69-472e-8f93-29abea759ce8 + +* It is recommended to rent a GPU from the Hyperbolic web app first to simplify the agent flow, but this is optional. + +## Setup +0. Make sure all the installation steps in the main README are completed. +1. First, set up the following REQUIRED env variables: +``` +ANTHROPIC_API_KEY= +CDP_API_KEY_NAME= +CDP_API_KEY_PRIVATE_KEY= + +HYPERBOLIC_API_KEY= + +SSH_PRIVATE_KEY_PATH=/path/to/your/.ssh/id_rsa + +LANGCHAIN_TRACING_V2=true +LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +LANGCHAIN_API_KEY= +LANGCHAIN_PROJECT= +``` + +2. Boot up the chatbot gradio interface: +``` +poetry run python gradio_ui.py +``` + +3. Type in the following prompt in the chatbot interface: +``` +Run a fine tuning task using Mistral 7b using get_gpu_status first +``` + +4. Check your console logs and also SSH into your remote GPU instance to track the progress of the fine tuning task. +``` +ssh ubuntu@ -p XXXXX +cd finetune_example +ls +``` + +5. Once the fine tuning task is complete, you will see a "success" message in the chatbot interface. + +6. You can now use the fine tuned model for inference in your remote GPU instance by running: +``` +source venv/bin/activate +python3 test_inference.py "Your prompt here" +``` + +7. You can also edit the finetune.py script or training_data.jsonl file by running: +``` +nano finetune.py +nano training_data.jsonl +``` + +8. You can reinitiate another fine tuning task using your newly updated parameters (or another base model) by running: +``` +export FINE_TUNE_MODEL="unsloth/mistral-7b-v0.3-bnb-4bit" +python3 finetune.py +``` + +or you can just ask the Hyperbolic agent through the chat interface to do it for you. + +9. You can delete the finetuned model in your remote GPU instance by running: +``` +rm -rf finetuned_model +``` + +* Support for syncing the finetuned model back to your local machine is coming soon. + diff --git a/finetune_example/finetune.py b/finetune_example/finetune.py new file mode 100644 index 00000000..e3055413 --- /dev/null +++ b/finetune_example/finetune.py @@ -0,0 +1,127 @@ +import os +import torch +from unsloth import FastLanguageModel, is_bfloat16_supported +from transformers import TrainingArguments +from trl import SFTTrainer +from datasets import load_dataset +from datetime import datetime + +def log_progress(msg): + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"[{timestamp}] 🐰 {msg}") + +def format_chat(example): + """Format the chat messages into a single text string.""" + messages = example["messages"] + + # Format for Mistral chat template + formatted_messages = [] + for msg in messages: + if msg["role"] == "system": + # System message goes at the start + formatted_messages.insert(0, f"[INST] {msg['content']} [/INST]") + elif msg["role"] == "user": + formatted_messages.append(f"[INST] {msg['content']} [/INST]") + elif msg["role"] == "assistant": + formatted_messages.append(f"{msg['content']}") + + example["text"] = " ".join(formatted_messages) + return example + +def fine_tune(): + log_progress("🚀 Starting fine-tuning process...") + + # Load and format dataset + dataset = load_dataset("json", data_files={"train": "training_data.jsonl"})["train"] + + # Model configuration + max_seq_length = 4096 # Increased for longer context + dtype = None # Auto-detect optimal dtype + load_in_4bit = True + + # Configure model loading with explicit dtypes + model, tokenizer = FastLanguageModel.from_pretrained( + model_name="unsloth/mistral-7b-v0.3-bnb-4bit", + max_seq_length=max_seq_length, + dtype=dtype, + load_in_4bit=load_in_4bit + ) + + # Set the chat template for Mistral + tokenizer.chat_template = """{% for message in messages %} + {% if message['role'] == 'user' %} + {{ '[INST] ' + message['content'] + ' [/INST]' }} + {% elif message['role'] == 'assistant' %} + {{ message['content'] + '' }} + {% elif message['role'] == 'system' %} + {{ '[INST] ' + message['content'] + ' [/INST]' }} + {% endif %} + {% endfor %}""" + + # Add LoRA adapters with optimized settings + model = FastLanguageModel.get_peft_model( + model, + r=16, + target_modules=["q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj"], + lora_alpha=16, + lora_dropout=0, # Optimized setting + bias="none", # Optimized setting + use_gradient_checkpointing="unsloth", # Uses 30% less VRAM + random_state=3407, + use_rslora=False, + loftq_config=None + ) + + # Training arguments with mixed precision settings + training_args = TrainingArguments( + output_dir="./finetuned_model", + num_train_epochs=3, + per_device_train_batch_size=1, # Reduced for longer sequences + gradient_accumulation_steps=4, + warmup_steps=5, + learning_rate=2e-4, + logging_steps=1, + optim="adamw_8bit", + weight_decay=0.01, + lr_scheduler_type="linear", + # Use appropriate precision based on hardware + fp16=not is_bfloat16_supported(), + bf16=is_bfloat16_supported(), + torch_compile=False, # Disable torch compile for stability + seed=3407 + ) + + # Initialize trainer + trainer = SFTTrainer( + model=model, + train_dataset=dataset, + args=training_args, + tokenizer=tokenizer, + max_seq_length=max_seq_length, + dataset_num_proc=2, + packing=False + ) + + # Train + log_progress("🏃 Training model...") + trainer_stats = trainer.train() + + # Save the model + log_progress("💾 Saving fine-tuned model...") + output_dir = "./finetuned_model" + + # Save model and tokenizer + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + + # Optionally save in 8-bit GGUF format + try: + model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=["q8_0"]) + except Exception as e: + print(f"Note: GGUF export failed (this is optional): {e}") + + return trainer_stats + +if __name__ == "__main__": + result = fine_tune() \ No newline at end of file diff --git a/finetune_example/requirements.txt b/finetune_example/requirements.txt new file mode 100644 index 00000000..c7a984a4 --- /dev/null +++ b/finetune_example/requirements.txt @@ -0,0 +1,14 @@ +unsloth==2025.2.4 +unsloth-zoo==2025.2.3 +transformers==4.48.3 +trl==0.14.0 +xformers==0.0.28.post3 +torch==2.5.1 --index-url https://download.pytorch.org/whl/cu118 +torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu118 +accelerate +bitsandbytes +datasets +peft +ninja +numpy<2.0 +vllm==0.7.2 \ No newline at end of file diff --git a/finetune_example/test_inference.py b/finetune_example/test_inference.py new file mode 100644 index 00000000..bf1fabd2 --- /dev/null +++ b/finetune_example/test_inference.py @@ -0,0 +1,75 @@ +import json +import os +import sys +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +def run_inference(prompt): + # Ensure CUDA is available and initialized + if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + + # Force CUDA initialization + torch.cuda.init() + + # Print CUDA info for debugging + print(f"CUDA Device: {torch.cuda.get_device_name()}") + print(f"CUDA Version: {torch.version.cuda}") + + # Always use the finetuned model directory for inference + model_dir = os.path.abspath("./finetuned_model") + + try: + # Load base model and tokenizer + model = AutoModelForCausalLM.from_pretrained( + model_dir, + torch_dtype=torch.float16, + device_map="auto" + ) + tokenizer = AutoTokenizer.from_pretrained(model_dir) + + # Prepare input + test_prompt = f"[INST] {prompt} [/INST]" + inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device) + + # Generate + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=512, + temperature=0.7, + top_p=0.95, + do_sample=True + ) + + # Decode output + generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Format output + output = { + "prompt": prompt, + "response": generated_text + } + + # Save and print results + with open("inference_output.json", "w") as f: + json.dump(output, f, indent=2) + + print("\n=== Test Inference Results ===") + print(f"Prompt: {output['prompt']}") + print(f"Response: {output['response']}") + print("============================\n") + + return output + + except Exception as e: + print(f"Error during inference: {str(e)}") + raise + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python test_inference.py \"your prompt here\"") + sys.exit(1) + + prompt = sys.argv[1] + run_inference(prompt) \ No newline at end of file diff --git a/finetune_example/training_data.jsonl b/finetune_example/training_data.jsonl new file mode 100644 index 00000000..178de88d --- /dev/null +++ b/finetune_example/training_data.jsonl @@ -0,0 +1,3 @@ +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE trip (\n\tid INTEGER, \n\tduration INTEGER, \n\tstart_date TEXT, \n\tstart_station_name TEXT, \n\tstart_station_id INTEGER, \n\tend_date TEXT, \n\tend_station_name TEXT, \n\tend_station_id INTEGER, \n\tbike_id INTEGER, \n\tsubscription_type TEXT, \n\tzip_code INTEGER, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in trip and 3 examples in each column for high cardinality columns :\nid : 900645, 900752, 900524\nduration : 1131, 2146, 1155\nstart_date : 8/21/2015 17:39, 8/21/2015 17:03, 8/21/2015 17:16\nstart_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nstart_station_id : 56, 65, 49\nend_date : 8/21/2015 17:19, 8/21/2015 18:08, 8/21/2015 17:32\nend_station_name : Howard at 2nd, 2nd at Folsom, Market at 10th\nend_station_id : 56, 65, 49\nbike_id : 586, 56, 65\nzip_code : 94070, 94530, 94040-1724\n*/\n/*\nColumns in trip and all categories for low cardinality columns :\nsubscription_type : Customer, Subscriber\n*/\n \nCREATE TABLE \"Problems\" (\n\tproblem_id INTEGER, \n\tproduct_id INTEGER NOT NULL, \n\tclosure_authorised_by_staff_id INTEGER NOT NULL, \n\treported_by_staff_id INTEGER NOT NULL, \n\tdate_problem_reported DATETIME NOT NULL, \n\tdate_problem_closed DATETIME, \n\tproblem_description VARCHAR(255), \n\tother_problem_details VARCHAR(255), \n\tPRIMARY KEY (problem_id), \n\tFOREIGN KEY(reported_by_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(product_id) REFERENCES \"Product\" (product_id), \n\tFOREIGN KEY(closure_authorised_by_staff_id) REFERENCES \"Staff\" (staff_id)\n)\n/*\nColumns in Problems and 3 examples in each column for high cardinality columns :\nproblem_id : 1, 13, 11\nclosure_authorised_by_staff_id : 1, 13, 2\ndate_problem_reported : 1995-05-14 08:32:56, 1988-11-07 16:09:31, 1986-11-13 07:30:55\ndate_problem_closed : 1974-09-20 13:42:19, 1997-10-18 20:09:57, 2004-06-20 01:08:25\nproblem_description : d, i, s\n*/\n/*\nColumns in Problems and all categories for low cardinality columns :\nproduct_id : 1, 13, 2, 5, 7, 8, 4, 6, 15\nreported_by_staff_id : 1, 13, 11, 2, 5, 7, 4, 14, 10\nother_problem_details : f, m, i, s, k, l, p, v, c\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE category (\n\tcategory_id INTEGER NOT NULL, \n\tname VARCHAR(25) NOT NULL, \n\tlast_update TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, \n\tPRIMARY KEY (category_id)\n)\n/*\nColumns in category and 3 examples in each column for high cardinality columns :\ncategory_id : 1, 16, 13\nname : Family, Sci-Fi, Action\n*/\n/*\nColumns in category and all categories for low cardinality columns :\nlast_update : 2006-02-15 04:46:27\n*/\n \nCREATE TABLE ship (\n\t\"Ship_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Type\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Tonnage\" INTEGER, \n\tPRIMARY KEY (\"Ship_ID\")\n)\n/*\nColumns in ship and all categories for low cardinality columns :\nShip_ID : 1, 2, 5, 7, 8, 4, 6, 3\nName : Clan McTavish, Farringford, Appam, Author, Dromonby, Corbridge, Trader, Ariadne\nType : Battle ship, Cargo ship\nNationality : United States, United Kingdom\nTonnage : 3035, 3146, 7781, 3496, 3687, 5816, 3627, 3608\n*/\n \nCREATE TABLE member_attendance (\n\t\"Member_ID\" INTEGER, \n\t\"Performance_ID\" INTEGER, \n\t\"Num_of_Pieces\" INTEGER, \n\tPRIMARY KEY (\"Member_ID\", \"Performance_ID\"), \n\tFOREIGN KEY(\"Performance_ID\") REFERENCES performance (\"Performance_ID\"), \n\tFOREIGN KEY(\"Member_ID\") REFERENCES member (\"Member_ID\")\n)\n/*\nColumns in member_attendance and all categories for low cardinality columns :\nMember_ID : 1, 11, 2, 5, 7, 4, 3\nPerformance_ID : 1, 2, 4, 6, 3\nNum_of_Pieces : 1, 2, 4, 3\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE chip_model (\n\t\"Model_name\" TEXT, \n\t\"Launch_year\" REAL, \n\t\"RAM_MiB\" REAL, \n\t\"ROM_MiB\" REAL, \n\t\"Slots\" TEXT, \n\t\"WiFi\" TEXT, \n\t\"Bluetooth\" TEXT, \n\tPRIMARY KEY (\"Model_name\")\n)\n/*\nColumns in chip_model and 3 examples in each column for high cardinality columns :\nModel_name : X30 mid-range, X50 Advanced, X51 mid-range\n*/\n/*\nColumns in chip_model and all categories for low cardinality columns :\nLaunch_year : 2002.0, 2005.0, 2004.0, 2003.0\nRAM_MiB : 32.0, 64.0\nROM_MiB : 48.0, 256.0, 128.0, 32.0, 64.0\nSlots : 1CFII,1SD, 1SD\nWiFi : 802.11b, No\nBluetooth : 1.2, Yes, No, 1.1\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE mountain (\n\t\"Mountain_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Height\" REAL, \n\t\"Prominence\" REAL, \n\t\"Range\" TEXT, \n\t\"Country\" TEXT, \n\tPRIMARY KEY (\"Mountain_ID\")\n)\n/*\nColumns in mountain and all categories for low cardinality columns :\nMountain_ID : 1, 2, 5, 7, 4, 6, 3\nName : Ngaliema / Mt Stanley (Margherita Pk), Mount Kenya (Lenana), Kibo (Uhuru Pk), Ngaliema / Mt Stanley (Savoia Pk), Mount Kenya (Batian), Duwoni / Mt Speke (Vittorio Emanuele Pk), Mawenzi (Hans Meyer Pk)\nHeight : 5109.0, 5199.0, 5895.0, 4890.0, 4985.0, 4977.0, 5148.0\nProminence : 720.0, 850.0, 3951.0, 3825.0, 130.0, 5885.0, 110.0\nRange : Kilimanjaro, Mount Kenya, Rwenzori\nCountry : DR Congo Uganda, Uganda, Tanzania, Kenya\n*/\n \nCREATE TABLE \"Restaurant_Type\" (\n\t\"ResTypeID\" INTEGER, \n\t\"ResTypeName\" VARCHAR(40), \n\t\"ResTypeDescription\" VARCHAR(100), \n\tPRIMARY KEY (\"ResTypeID\")\n)\n/*\nColumns in Restaurant_Type and all categories for low cardinality columns :\nResTypeID : 1, 2\nResTypeName : Sandwich, Stir-fry\nResTypeDescription : Classic Chinese cooking., Simplest there is.\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE \"Country\" (\n\tid INTEGER, \n\tname TEXT, \n\tPRIMARY KEY (id)\n)\n/*\nColumns in Country and 3 examples in each column for high cardinality columns :\nid : 1, 19694, 7809\nname : Scotland, Italy, Spain\n*/\n\n \nCREATE TABLE artist (\n\tartist_name TEXT(50) NOT NULL, \n\tcountry TEXT(20), \n\tgender TEXT(20), \n\tpreferred_genre TEXT(50), \n\tCONSTRAINT a_name PRIMARY KEY (artist_name), \n\tFOREIGN KEY(preferred_genre) REFERENCES genre (g_name) ON DELETE CASCADE\n)\n/*\nColumns in artist and all categories for low cardinality columns :\nartist_name : Prity, Michel, Topu, Shrikanta, Enrique, Farida\ncountry : India, UK, USA, Bangladesh\ngender : Male, Female\npreferred_genre : tagore, folk, modern, nazrul, blues, pop\n*/\n \nCREATE TABLE \"Organizations\" (\n\torganization_id INTEGER NOT NULL, \n\tparent_organization_id INTEGER, \n\torganization_details VARCHAR(255), \n\tPRIMARY KEY (organization_id)\n)\n/*\nColumns in Organizations and all categories for low cardinality columns :\norganization_id : 7, 8, 10\nparent_organization_id : 7, 8\norganization_details : Denesik and Sons Party, Reinger, Hudson and Nolan Group, Robel-Schulist Group\n*/\n \nCREATE TABLE school (\n\t\"School_ID\" INTEGER, \n\t\"School\" TEXT, \n\t\"Location\" TEXT, \n\t\"Enrollment\" REAL, \n\t\"Founded\" REAL, \n\t\"Denomination\" TEXT, \n\t\"Boys_or_Girls\" TEXT, \n\t\"Day_or_Boarding\" TEXT, \n\t\"Year_Entered_Competition\" REAL, \n\t\"School_Colors\" TEXT, \n\tPRIMARY KEY (\"School_ID\")\n)\n/*\nColumns in school and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nSchool : St Aloysius' College, Cranbrook School, Waverley College, Knox Grammar School, Barker College, Trinity Grammar School\nLocation : Hornsby, Summer Hill, Waverley, Bellevue Hill, Milsons Point, Wahroonga\nEnrollment : 1000.0, 1850.0, 2200.0, 1200.0, 2300.0, 1430.0\nFounded : 1918.0, 1924.0, 1913.0, 1879.0, 1903.0, 1890.0\nDenomination : Catholic, Uniting Church, Anglican\nBoys_or_Girls : Boys only to Yr 9 Co-ed Year 10 to 12, Boys\nDay_or_Boarding : Day, Day & Boarding\nYear_Entered_Competition : 1944.0, 1929.0\nSchool_Colors : Royal Blue and Gold, Black & Blue, Red, White & Blue, Red & Blue, Green and White\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE journalist (\n\t\"journalist_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Age\" TEXT, \n\t\"Years_working\" INTEGER, \n\tPRIMARY KEY (\"journalist_ID\")\n)\n/*\nColumns in journalist and 3 examples in each column for high cardinality columns :\njournalist_ID : 1, 11, 2\nName : Jack Meaney, Herbert Swindells, Jackie Waring\n*/\n/*\nColumns in journalist and all categories for low cardinality columns :\nNationality : Northern Ireland, Wales, England\nAge : 37, 28, 25, 33, 34, 43, 27, 29\nYears_working : 1, 5, 7, 8, 21, 6, 3, 12, 10, 9\n*/\n \nCREATE TABLE list (\n\t\"LastName\" TEXT, \n\t\"FirstName\" TEXT, \n\t\"Grade\" INTEGER, \n\t\"Classroom\" INTEGER, \n\tPRIMARY KEY (\"LastName\", \"FirstName\")\n)\n/*\nColumns in list and 3 examples in each column for high cardinality columns :\nLastName : HOUTCHENS, GELL, FLACHS\nFirstName : RAY, EMILE, PATRINA\nClassroom : 109, 110, 106\n*/\n/*\nColumns in list and all categories for low cardinality columns :\nGrade : 1, 2, 5, 4, 6, 3, 0\n*/\n"}, {"role": "user", "content": "How many heads of the departments are older than 56 ?"}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: age.\nThe SQL query I'll be generating is:\nSELECT count(*) FROM head WHERE age > 56"}]} +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n \nCREATE TABLE \"Documents_with_Expenses\" (\n\t\"Document_ID\" INTEGER NOT NULL, \n\t\"Budget_Type_Code\" CHAR(15) NOT NULL, \n\t\"Document_Details\" VARCHAR(255), \n\tPRIMARY KEY (\"Document_ID\"), \n\tFOREIGN KEY(\"Budget_Type_Code\") REFERENCES \"Ref_Budget_Codes\" (\"Budget_Type_Code\"), \n\tFOREIGN KEY(\"Document_ID\") REFERENCES \"Documents\" (\"Document_ID\")\n)\n/*\nColumns in Documents_with_Expenses and all categories for low cardinality columns :\nDocument_ID : 300, 240, 309, 226, 367, 371, 383, 227, 192, 57\nBudget_Type_Code : GV, SF, ORG\nDocument_Details : government, safety, organization\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_address_id INTEGER NOT NULL, \n\tcustomer_status_code VARCHAR(15) NOT NULL, \n\tdate_became_customer DATETIME, \n\tdate_of_birth DATETIME, \n\tfirst_name VARCHAR(80), \n\tlast_name VARCHAR(80), \n\tamount_outstanding DOUBLE, \n\temail_address VARCHAR(250), \n\tphone_number VARCHAR(255), \n\tcell_mobile_phone_number VARCHAR(255), \n\tPRIMARY KEY (customer_id), \n\tFOREIGN KEY(customer_address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ndate_became_customer : 2016-03-18 06:14:18, 2016-05-13 07:38:23, 2017-10-05 14:15:46\ndate_of_birth : 1994-05-07 01:32:16, 1991-12-05 02:50:15, 1998-09-18 04:45:01\nfirst_name : Millie, Dianna, Carole\nlast_name : Schmeler, Tromp, Goodwin\namount_outstanding : 7093.0, 8144.0, 9447.0\nemail_address : mariela28@example.org, wehner.harold@example.com, afranecki@example.com\nphone_number : 1-618-535-9750, 218.550.1362, 753.921.0871\ncell_mobile_phone_number : 578-518-4785x612, (780)719-4206x033, (630)964-2426\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\ncustomer_address_id : 1, 13, 2, 7, 8, 12, 10, 9\ncustomer_status_code : Bad Customer, Good Customer\n*/\n \nCREATE TABLE \"Instruments\" (\n\t\"SongId\" INTEGER, \n\t\"BandmateId\" INTEGER, \n\t\"Instrument\" TEXT, \n\tPRIMARY KEY (\"SongId\", \"BandmateId\", \"Instrument\"), \n\tFOREIGN KEY(\"SongId\") REFERENCES \"Songs\" (\"SongId\"), \n\tFOREIGN KEY(\"BandmateId\") REFERENCES \"Band\" (\"Id\")\n)\n/*\nColumns in Instruments and all categories for low cardinality columns :\nSongId : \nBandmateId : \nInstrument : \n*/\n \nCREATE TABLE \"Apartments\" (\n\tapt_id INTEGER NOT NULL, \n\tbuilding_id INTEGER NOT NULL, \n\tapt_type_code CHAR(15), \n\tapt_number CHAR(10), \n\tbathroom_count INTEGER, \n\tbedroom_count INTEGER, \n\troom_count CHAR(5), \n\tPRIMARY KEY (apt_id), \n\tFOREIGN KEY(building_id) REFERENCES \"Apartment_Buildings\" (building_id), \n\tUNIQUE (apt_id)\n)\n/*\nColumns in Apartments and 3 examples in each column for high cardinality columns :\napt_id : 1, 13, 11\napt_number : Suite 307, Apt. 411, Apt. 585\n*/\n/*\nColumns in Apartments and all categories for low cardinality columns :\nbuilding_id : 153, 673, 808, 744, 624, 790, 191, 734, 225\napt_type_code : Studio, Flat, Duplex\nbathroom_count : 1, 2, 3\nbedroom_count : 5, 4, 6, 3\nroom_count : 5, 7, 8, 6, 3, 9\n*/\n \nCREATE TABLE \"Customers\" (\n\tcustomer_id INTEGER, \n\tcustomer_first_name VARCHAR(20), \n\tcustomer_last_name VARCHAR(20), \n\tcustomer_address VARCHAR(255), \n\tcustomer_phone VARCHAR(255), \n\tcustomer_email VARCHAR(255), \n\tother_customer_details VARCHAR(255), \n\tPRIMARY KEY (customer_id)\n)\n/*\nColumns in Customers and 3 examples in each column for high cardinality columns :\ncustomer_id : 1, 13, 11\ncustomer_first_name : Armani, Bryce, Nat\ncustomer_last_name : Swift, Lind, Huels\ncustomer_address : 6478 Moen Isle Suite 910\nSchimmelmouth, VT 96364-4898, 47286 Mraz Park Apt. 424\nJocelynfurt, OH 59023-2787, 094 Julianne Mill\nWebstertown, KY 91980-4004\ncustomer_phone : (415)237-0701x3115, 679-845-8645x94312, 284.749.0453\ncustomer_email : kyra.murazik@example.org, schaden.katrina@example.net, huels.antonina@example.com\n*/\n/*\nColumns in Customers and all categories for low cardinality columns :\nother_customer_details : None\n*/\n \nCREATE TABLE \"Guests\" (\n\tguest_id INTEGER NOT NULL, \n\tgender_code CHAR(1), \n\tguest_first_name VARCHAR(80), \n\tguest_last_name VARCHAR(80), \n\tdate_of_birth DATETIME, \n\tPRIMARY KEY (guest_id), \n\tUNIQUE (guest_id)\n)\n/*\nColumns in Guests and 3 examples in each column for high cardinality columns :\nguest_id : 1, 13, 11\nguest_first_name : Louvenia, Lou, Keith\nguest_last_name : Bode, Mante, Schowalter\ndate_of_birth : 2001-06-18 20:05:55, 1980-06-20 18:15:39, 1997-01-15 17:37:40\n*/\n/*\nColumns in Guests and all categories for low cardinality columns :\ngender_code : Male, Unknown, Female\n*/\n \nCREATE TABLE school_details (\n\t\"School_ID\" INTEGER, \n\t\"Nickname\" TEXT, \n\t\"Colors\" TEXT, \n\t\"League\" TEXT, \n\t\"Class\" TEXT, \n\t\"Division\" TEXT, \n\tPRIMARY KEY (\"School_ID\"), \n\tFOREIGN KEY(\"School_ID\") REFERENCES school (\"School_ID\")\n)\n/*\nColumns in school_details and all categories for low cardinality columns :\nSchool_ID : 1, 2, 5, 4, 6, 3\nNickname : Auks, Raiders, Silver Eagles, Seahawks, Buccaneers, Tigers\nColors : Dark Green and White, Blue and Yellow, Maroon and White, Blue and Gold, Garnet and White, Silver and Maroon\nLeague : DSHA\nClass : Flight B, Flight A, Fight A\nDivision : Division 1, Division 2, Division 3\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Sex\" TEXT, \n\t\"Name\" TEXT, \n\t\"Date_of_Birth\" TEXT, \n\t\"Height\" REAL, \n\t\"Weight\" REAL, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 8, 4, 6, 3, 9\nSex : F, M\nName : Vincent Montméat, Frantz Granvorka, Loïc De Kergret, Philippe Barça-Cysique, Guillaume Samica, Stéphane Antiga, Dominique Daquin, Laurent Capet, Hubert Henno\nDate_of_Birth : 20.08.1970, 10.03.1976, 05.05.1972, 28.09.1981, 22.04.1977, 01.09.1977, 10.11.1972, 06.10.1976, 03.02.1976\nHeight : 196.0, 195.0, 197.0, 188.0, 193.0, 200.0, 202.0, 194.0\nWeight : 89.0, 82.0, 85.0, 88.0, 90.0, 92.0, 83.0, 94.0\n*/\n \nCREATE TABLE protein (\n\tcommon_name TEXT, \n\tprotein_name TEXT, \n\tdivergence_from_human_lineage REAL, \n\taccession_number TEXT, \n\tsequence_length REAL, \n\tsequence_identity_to_human_protein TEXT, \n\t\"Institution_id\" TEXT, \n\tPRIMARY KEY (common_name), \n\tFOREIGN KEY(\"Institution_id\") REFERENCES \"Institution\" (\"Institution_id\")\n)\n/*\nColumns in protein and all categories for low cardinality columns :\ncommon_name : Tropical Clawed Frog, porkworm, body louse, southern house mosquito, purple sea urchin\nprotein_name : uncharacterized protein LOC578090, conserved hypothetical protein, Centromeric protein E, putative, surface antigen repeat family protein, uncharacterized protein C20orf117-like\ndivergence_from_human_lineage : 937.5, 742.9, 782.7, 371.2\naccession_number : XP_003380263.1, XP_002942331.1, XP_783370.2, XP_002429877.1, XP_001843754.1\nsequence_length : 2030.0, 1587.0, 2086.0, 1584.0, 1878.0\nsequence_identity_to_human_protein : 30%, 47%, 36%, 39%, 32%\nInstitution_id : 1, 5, 9, 3\n*/\n \nCREATE TABLE \"Products\" (\n\tproduct_id INTEGER, \n\tcolor_code VARCHAR(15) NOT NULL, \n\tproduct_category_code VARCHAR(15) NOT NULL, \n\tproduct_name VARCHAR(80), \n\ttypical_buying_price VARCHAR(20), \n\ttypical_selling_price VARCHAR(20), \n\tproduct_description VARCHAR(255), \n\tother_product_details VARCHAR(255), \n\tPRIMARY KEY (product_id), \n\tFOREIGN KEY(color_code) REFERENCES \"Ref_Colors\" (color_code), \n\tFOREIGN KEY(product_category_code) REFERENCES \"Ref_Product_Categories\" (product_category_code)\n)\n/*\nColumns in Products and 3 examples in each column for high cardinality columns :\nproduct_id : 1, 13, 11\nproduct_name : peper, cumin, chili\ntypical_buying_price : , 5.578, 503.8431967\ntypical_selling_price : , 149, 10377614.847385\nproduct_description : ut, et, vel\n*/\n/*\nColumns in Products and all categories for low cardinality columns :\ncolor_code : 1, 2, 5, 7, 4, 9\nproduct_category_code : Spices, Seeds, Herbs\nother_product_details : None\n*/\n \nCREATE TABLE film_text (\n\tfilm_id SMALLINT NOT NULL, \n\ttitle VARCHAR(255) NOT NULL, \n\tdescription TEXT, \n\tPRIMARY KEY (film_id)\n)\n/*\nColumns in film_text and all categories for low cardinality columns :\nfilm_id : \ntitle : \ndescription : \n*/\n \nCREATE TABLE \"Student\" (\n\t\"StuID\" INTEGER, \n\t\"LName\" VARCHAR(12), \n\t\"Fname\" VARCHAR(12), \n\t\"Age\" INTEGER, \n\t\"Sex\" VARCHAR(1), \n\t\"Major\" INTEGER, \n\t\"Advisor\" INTEGER, \n\tcity_code VARCHAR(3), \n\tPRIMARY KEY (\"StuID\")\n)\n/*\nColumns in Student and 3 examples in each column for high cardinality columns :\nStuID : 1003, 1017, 1002\nLName : Lee, Han, Brody\nFname : Tracy, Michael, Shiela\nAdvisor : 7723, 7712, 7134\ncity_code : ATL, HOU, LON\n*/\n/*\nColumns in Student and all categories for low cardinality columns :\nAge : 26, 16, 20, 17, 21, 19, 18, 27, 22\nSex : F, M\nMajor : 520, 100, 50, 600, 540, 550\n*/\n \nCREATE TABLE pilot (\n\t\"Pilot_ID\" INTEGER, \n\t\"Pilot_name\" TEXT, \n\t\"Rank\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Nationality\" TEXT, \n\t\"Position\" TEXT, \n\t\"Join_Year\" INTEGER, \n\t\"Team\" TEXT, \n\tPRIMARY KEY (\"Pilot_ID\")\n)\n/*\nColumns in pilot and all categories for low cardinality columns :\nPilot_ID : 1, 2, 5, 4, 3\nPilot_name : Patrick O'Bryant, Charles Oakley, Dan O'Sullivan, Hakeem Olajuwon, Jermaine O'Neal\nRank : 34, 13, 45, 6\nAge : 37, 33, 40, 32, 22\nNationality : United States, Nigeria, United Kindom\nPosition : Center Team, Forward Team, Forward-Center Team\nJoin_Year : 2009, 2001, 2008, 1999, 2010\nTeam : Fordham, Eau Claire High School, Bradley, Virginia Union, Houston\n*/\n \nCREATE TABLE flight (\n\tid INTEGER, \n\t\"Vehicle_Flight_number\" TEXT, \n\t\"Date\" TEXT, \n\t\"Pilot\" TEXT, \n\t\"Velocity\" REAL, \n\t\"Altitude\" REAL, \n\tairport_id INTEGER, \n\tcompany_id INTEGER, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(company_id) REFERENCES operate_company (id), \n\tFOREIGN KEY(airport_id) REFERENCES airport (id)\n)\n/*\nColumns in flight and 3 examples in each column for high cardinality columns :\nid : 1, 13, 11\nVehicle_Flight_number : M2-F1 #14, M2-F1 #61, M2-F1 #0\nDate : July 16, 1965, May 19, 1964, March 28, 1966\n*/\n/*\nColumns in flight and all categories for low cardinality columns :\nPilot : Thompson, Peterson\nVelocity : 240.0, 135.0\nAltitude : 3650.0, 0.0\nairport_id : 1, 2, 5, 8, 4, 6, 3, 9\ncompany_id : 1, 13, 11, 2, 5, 7, 4, 6, 3, 9\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n"}, {"role": "user", "content": "List the name, born state and age of the heads of departments ordered by age."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: head.\nFrom these tables, I'll be using the following columns: name, born_state, age.\nThe SQL query I'll be generating is:\nSELECT name , born_state , age FROM head ORDER BY age"}]} +{"messages": [{"role": "system", "content": "\nYou are an assistant that is an expert in generating sqlite SQL queries.\nHaving the access to database content, generate a correct sqlite SQL query for the given question.\n### Database content ###\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Age\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Nationality\" TEXT, \n\t\"Graduation_College\" TEXT, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and all categories for low cardinality columns :\nPeople_ID : 1, 2, 5, 7, 4, 6, 3\nAge : 37, 25, 30, 34, 27, 31\nName : Tom Sheehey, Reggie Lewis, Brad Lohaus, David Butler, Tim Naegeli, Darryl Kennedy, Jerry Corcoran\nNationality : United States, United Kindom\nGraduation_College : Wisconsin–Stevens Point, California, Virginia, Iowa, Northeastern, Oklahoma\n*/\n \nCREATE TABLE \"People_Addresses\" (\n\tperson_address_id INTEGER NOT NULL, \n\tperson_id INTEGER NOT NULL, \n\taddress_id INTEGER NOT NULL, \n\tdate_from DATETIME, \n\tdate_to DATETIME, \n\tPRIMARY KEY (person_address_id), \n\tFOREIGN KEY(person_id) REFERENCES \"People\" (person_id), \n\tFOREIGN KEY(address_id) REFERENCES \"Addresses\" (address_id)\n)\n/*\nColumns in People_Addresses and all categories for low cardinality columns :\nperson_address_id : 269, 276, 281, 257, 363, 396, 122, 340\nperson_id : 151, 141, 131, 171, 111, 161, 181, 121\naddress_id : 45, 9, 5, 88, 55, 92, 82, 99\ndate_from : 2013-12-26 16:57:01, 2008-07-31 02:17:25, 2012-09-26 13:21:00, 2008-05-26 20:43:41, 2014-05-10 00:32:31, 2010-11-26 05:21:12, 2017-05-01 17:32:26, 2015-05-24 16:14:12\ndate_to : 2018-03-21 09:46:30, 2018-03-08 06:16:47, 2018-02-23 22:44:18, 2018-03-11 20:26:41, 2018-03-09 02:11:12, 2018-03-09 08:45:06, 2018-03-03 16:06:17, 2018-03-12 21:10:02\n*/\n \nCREATE TABLE people (\n\t\"People_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Country\" TEXT, \n\t\"Is_Male\" TEXT, \n\t\"Age\" INTEGER, \n\tPRIMARY KEY (\"People_ID\")\n)\n/*\nColumns in people and 3 examples in each column for high cardinality columns :\nPeople_ID : 1, 11, 2\nName : Summer Duval, Annie Mediate, Juli Hanson\n*/\n/*\nColumns in people and all categories for low cardinality columns :\nCountry : Sweden, Canada, United States, Australia, Northern Ireland\nIs_Male : T, F\nAge : 26, 25, 36, 30, 34, 32, 27, 31\n*/\n \nCREATE TABLE farm_competition (\n\t\"Competition_ID\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Theme\" TEXT, \n\t\"Host_city_ID\" INTEGER, \n\t\"Hosts\" TEXT, \n\tPRIMARY KEY (\"Competition_ID\"), \n\tFOREIGN KEY(\"Host_city_ID\") REFERENCES city (\"City_ID\")\n)\n/*\nColumns in farm_competition and all categories for low cardinality columns :\nCompetition_ID : 1, 2, 5, 4, 6, 3\nYear : 2004, 2013, 2005, 2006, 2003, 2002\nTheme : MTV Cube, Valentine's Day, Codehunters, Carnival M is back!, Aliens, MTV Asia Aid\nHost_city_ID : 1, 2, 5, 4, 3\nHosts : Mandy Moore and Ronan Keating, Alicia Keys, Shaggy and Coco Lee, Leehom Wang and Kelly Rowland, Miley Cyrus Jared Leto and Karen Mok, Vanness Wu and Michelle Branch\n*/\n \nCREATE TABLE head (\n\t\"head_ID\" INTEGER, \n\tname TEXT, \n\tborn_state TEXT, \n\tage REAL, \n\tPRIMARY KEY (\"head_ID\")\n)\n/*\nColumns in head and all categories for low cardinality columns :\nhead_ID : 1, 2, 5, 7, 8, 4, 6, 3, 10, 9\nname : Jeff Maggert, Pádraig Harrington, Billy Mayfair, K. J. Choi, Dudley Hart, Sergio García, Stewart Cink, Tiger Woods, Nick Faldo, Franklin Langham\nborn_state : Delaware, Connecticut, Alabama, California, Florida\nage : 69.0, 67.0, 68.0, 53.0, 56.0, 52.0, 50.0, 43.0\n*/\n \nCREATE TABLE budget (\n\t\"School_id\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Budgeted\" INTEGER, \n\ttotal_budget_percent_budgeted REAL, \n\t\"Invested\" INTEGER, \n\ttotal_budget_percent_invested REAL, \n\t\"Budget_invested_percent\" TEXT, \n\tPRIMARY KEY (\"School_id\", \"Year\"), \n\tFOREIGN KEY(\"School_id\") REFERENCES \"School\" (\"School_id\")\n)\n/*\nColumns in budget and all categories for low cardinality columns :\nSchool_id : 1, 2, 5, 4, 3\nYear : 2004, 2001, 2005, 1999, 2000, 2006, 2002, 2003\nBudgeted : 113531, 3666, 61283, 119527, 6803, 4979, 74849\ntotal_budget_percent_budgeted : 1.3, 2.0, 2.4\nInvested : 8335, 5840, 95542, 146102, 2134, 85249, 140102\ntotal_budget_percent_invested : 2.7, 2.2, 2.0\nBudget_invested_percent : 71.3, 228.6, 146.9, 127.6, 228.8, 122.5, 42.9, 159.3\n*/\n \nCREATE TABLE \"Roles\" (\n\t\"Role_Code\" CHAR(15) NOT NULL, \n\t\"Role_Name\" VARCHAR(255), \n\t\"Role_Description\" VARCHAR(255), \n\tPRIMARY KEY (\"Role_Code\")\n)\n/*\nColumns in Roles and all categories for low cardinality columns :\nRole_Code : PR, HR, PT, MG, ED\nRole_Name : Proof Reader, Human Resource, Editor, Photo, Manager\nRole_Description : Itaque dolor ut nemo rerum vitae provident. Vel laborum ipsum velit sint. Et est omnis dignissimos., Ut sed quae eaque mollitia qui hic. Natus ea expedita et odio illum fugiat qui natus. Consequatur velit ut dolorem cum ullam esse deserunt dignissimos. Enim non non rem officiis quis., Aut modi nihil molestias temporibus sit rerum. Sit neque eaque odio omnis incidunt., Vero harum corrupti odit ipsa vero et odio. Iste et recusandae temporibus maxime. Magni aspernatur fugit quis explicabo totam esse corrupti., Et totam est quibusdam aspernatur ut. Vitae perferendis eligendi voluptatem molestiae rem ut enim. Ipsum expedita quae earum unde est. Repellendus ut ipsam nihil accusantium sit. Magni accusantium numquam quod et.\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\tinvoice_date DATETIME, \n\tinvoice_details VARCHAR(255), \n\tPRIMARY KEY (invoice_number)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 1987-11-15 15:41:40, 1998-07-30 15:39:12, 1974-07-27 16:18:49\ninvoice_details : doloremque, eveniet, earum\n*/\n\n \nCREATE TABLE department (\n\t\"Department_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Creation\" TEXT, \n\t\"Ranking\" INTEGER, \n\t\"Budget_in_Billions\" REAL, \n\t\"Num_Employees\" REAL, \n\tPRIMARY KEY (\"Department_ID\")\n)\n/*\nColumns in department and 3 examples in each column for high cardinality columns :\nDepartment_ID : 1, 13, 11\nName : Energy, Interior, Health and Human Services\nCreation : 1913, 1979, 1989\nRanking : 1, 13, 11\nBudget_in_Billions : 10.7, 77.6, 59.7\nNum_Employees : 112557.0, 3000000.0, 235000.0\n*/\n\n \nCREATE TABLE club_rank (\n\t\"Rank\" REAL, \n\t\"Club_ID\" INTEGER, \n\t\"Gold\" REAL, \n\t\"Silver\" REAL, \n\t\"Bronze\" REAL, \n\t\"Total\" REAL, \n\tPRIMARY KEY (\"Rank\", \"Club_ID\"), \n\tFOREIGN KEY(\"Club_ID\") REFERENCES club (\"Club_ID\")\n)\n/*\nColumns in club_rank and all categories for low cardinality columns :\nRank : 3.0, 1.0, 2.0, 6.0, 4.0, 5.0\nClub_ID : 1, 2, 5, 4, 6, 3\nGold : 8.0, 3.0, 11.0, 7.0, 2.0, 4.0\nSilver : 3.0, 11.0, 1.0, 7.0, 2.0, 4.0\nBronze : 6.0, 9.0, 0.0, 2.0\nTotal : 3.0, 12.0, 13.0, 6.0, 21.0, 31.0\n*/\n \nCREATE TABLE \"Staff\" (\n\tstaff_id INTEGER, \n\tstaff_gender VARCHAR(1), \n\tstaff_name VARCHAR(80), \n\tPRIMARY KEY (staff_id)\n)\n/*\nColumns in Staff and 3 examples in each column for high cardinality columns :\nstaff_id : 1, 13, 11\nstaff_name : Yolanda, Tom, Melody\n*/\n/*\nColumns in Staff and all categories for low cardinality columns :\nstaff_gender : 1, 0\n*/\n \nCREATE TABLE \"Problem_Log\" (\n\tproblem_log_id INTEGER, \n\tassigned_to_staff_id INTEGER NOT NULL, \n\tproblem_id INTEGER NOT NULL, \n\tproblem_category_code VARCHAR(20) NOT NULL, \n\tproblem_status_code VARCHAR(20) NOT NULL, \n\tlog_entry_date DATETIME, \n\tlog_entry_description VARCHAR(255), \n\tlog_entry_fix VARCHAR(255), \n\tother_log_details VARCHAR(255), \n\tPRIMARY KEY (problem_log_id), \n\tFOREIGN KEY(problem_status_code) REFERENCES \"Problem_Status_Codes\" (problem_status_code), \n\tFOREIGN KEY(problem_id) REFERENCES \"Problems\" (problem_id), \n\tFOREIGN KEY(assigned_to_staff_id) REFERENCES \"Staff\" (staff_id), \n\tFOREIGN KEY(problem_category_code) REFERENCES \"Problem_Category_Codes\" (problem_category_code)\n)\n/*\nColumns in Problem_Log and 3 examples in each column for high cardinality columns :\nproblem_log_id : 1, 13, 11\nlog_entry_date : 1997-08-31 08:19:12, 1974-09-13 00:37:26, 2011-11-12 23:30:53\nlog_entry_description : t, d, b\nlog_entry_fix : t, i, s\nother_log_details : d, f, b\n*/\n/*\nColumns in Problem_Log and all categories for low cardinality columns :\nassigned_to_staff_id : 1, 13, 11, 2, 5, 8, 4, 6, 12, 10\nproblem_id : 1, 13, 11, 10, 7, 8, 4, 12, 15\nproblem_category_code : Datatabase, Middleware, GUI\nproblem_status_code : Solved, Reported\n*/\n \nCREATE TABLE \"Invoices\" (\n\tinvoice_number INTEGER, \n\torder_id INTEGER NOT NULL, \n\tinvoice_date DATETIME, \n\tPRIMARY KEY (invoice_number), \n\tFOREIGN KEY(order_id) REFERENCES \"Orders\" (order_id)\n)\n/*\nColumns in Invoices and 3 examples in each column for high cardinality columns :\ninvoice_number : 1, 13, 11\ninvoice_date : 2018-03-16 21:57:43, 2018-03-20 00:21:41, 2018-03-05 08:47:33\n*/\n/*\nColumns in Invoices and all categories for low cardinality columns :\norder_id : 13, 11, 5, 8, 4, 12, 3, 14, 10, 9\n*/\n \nCREATE TABLE \"Customer_Events\" (\n\t\"Customer_Event_ID\" INTEGER NOT NULL, \n\tcustomer_id INTEGER, \n\tdate_moved_in DATETIME, \n\tproperty_id INTEGER, \n\tresident_id INTEGER, \n\tthing_id INTEGER NOT NULL, \n\tPRIMARY KEY (\"Customer_Event_ID\"), \n\tFOREIGN KEY(thing_id) REFERENCES \"Things\" (thing_id), \n\tFOREIGN KEY(customer_id) REFERENCES \"Customers\" (customer_id), \n\tFOREIGN KEY(resident_id, property_id, date_moved_in) REFERENCES \"Residents\" (resident_id, property_id, date_moved_in)\n)\n/*\nColumns in Customer_Events and 3 examples in each column for high cardinality columns :\nCustomer_Event_ID : 428, 216, 226\nresident_id : 87, 38, 37\n*/\n/*\nColumns in Customer_Events and all categories for low cardinality columns :\ncustomer_id : 79, 91, 16, 5, 4, 63, 19, 31\ndate_moved_in : 2016-09-27 12:00:00, 2015-03-27 12:00:00\nproperty_id : 954, 120, 108, 605, 629, 748, 669, 445, 879, 107\nthing_id : 1, 46, 10, 80, 2, 92, 14, 85, 68\n*/\n \nCREATE TABLE \"Type_Of_Restaurant\" (\n\t\"ResID\" INTEGER, \n\t\"ResTypeID\" INTEGER, \n\tFOREIGN KEY(\"ResID\") REFERENCES \"Restaurant\" (\"ResID\"), \n\tFOREIGN KEY(\"ResTypeID\") REFERENCES \"Restaurant_Type\" (\"ResTypeID\")\n)\n/*\nColumns in Type_Of_Restaurant and all categories for low cardinality columns :\nResID : 1, 2\nResTypeID : 1, 2\n*/\n \nCREATE TABLE discipline_enrollments (\n\t\"Campus\" INTEGER, \n\t\"Discipline\" INTEGER, \n\t\"Year\" INTEGER, \n\t\"Undergraduate\" INTEGER, \n\t\"Graduate\" INTEGER, \n\tPRIMARY KEY (\"Campus\", \"Discipline\"), \n\tFOREIGN KEY(\"Campus\") REFERENCES \"Campuses\" (\"Id\")\n)\n/*\nColumns in discipline_enrollments and 3 examples in each column for high cardinality columns :\nCampus : 1, 13, 11\nDiscipline : 19, 17, 21\nUndergraduate : 986, 65, 815\nGraduate : 56, 377, 187\n*/\n/*\nColumns in discipline_enrollments and all categories for low cardinality columns :\nYear : 2004\n*/\n \nCREATE TABLE country (\n\t\"Country_ID\" INTEGER, \n\t\"Name\" TEXT, \n\t\"Population\" INTEGER, \n\t\"Area\" INTEGER, \n\t\"Languages\" TEXT, \n\tPRIMARY KEY (\"Country_ID\")\n)\n/*\nColumns in country and all categories for low cardinality columns :\nCountry_ID : 1, 2, 3\nName : Austria, Sweden, Finland\nPopulation : 8206524, 9047752, 5261008\nArea : 83871, 338145, 449964\nLanguages : Swedish, Finnish Swedish, German\n*/\n \nCREATE TABLE management (\n\t\"department_ID\" INTEGER, \n\t\"head_ID\" INTEGER, \n\ttemporary_acting TEXT, \n\tPRIMARY KEY (\"department_ID\", \"head_ID\"), \n\tFOREIGN KEY(\"head_ID\") REFERENCES head (\"head_ID\"), \n\tFOREIGN KEY(\"department_ID\") REFERENCES department (\"Department_ID\")\n)\n/*\nColumns in management and all categories for low cardinality columns :\ndepartment_ID : 7, 15, 2, 11\nhead_ID : 5, 4, 6, 3, 10\ntemporary_acting : Yes, No\n*/\n"}, {"role": "user", "content": "List the creation year, name and budget of each department."}, {"role": "assistant", "content": "\nTo construct the query, I'll be working with the following tables: department.\nFrom these tables, I'll be using the following columns: creation, name, budget_in_billions.\nThe SQL query I'll be generating is:\nSELECT creation , name , budget_in_billions FROM department"}]} diff --git a/gradio_ui.py b/gradio_ui.py index bab3e2d2..886a9d45 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -10,14 +10,15 @@ async def chat_with_agent(message, history): # Initialize agent if not already done if not hasattr(chat_with_agent, "agent"): - agent_executor, config, twitter_api_wrapper, knowledge_base = await initialize_agent() + agent_executor, config, runnable_config, twitter_api_wrapper, knowledge_base, podcast_knowledge_base = await initialize_agent() chat_with_agent.agent = agent_executor chat_with_agent.config = config + chat_with_agent.runnable_config = runnable_config # Store runnable_config as well runnable_config = RunnableConfig( - recursion_limit=config["configurable"]["recursion_limit"], + recursion_limit=chat_with_agent.config["configurable"]["recursion_limit"], configurable={ - "thread_id": config["configurable"]["thread_id"], + "thread_id": chat_with_agent.config["configurable"]["thread_id"], "checkpoint_ns": "chat_mode", "checkpoint_id": str(datetime.now().timestamp()) } diff --git a/hyperbolic_agentkit_core/actions/__init__.py b/hyperbolic_agentkit_core/actions/__init__.py index 10012a00..99df08c3 100644 --- a/hyperbolic_agentkit_core/actions/__init__.py +++ b/hyperbolic_agentkit_core/actions/__init__.py @@ -8,6 +8,7 @@ from hyperbolic_agentkit_core.actions.terminate_compute import TerminateComputeAction from hyperbolic_agentkit_core.actions.get_current_balance import GetCurrentBalanceAction from hyperbolic_agentkit_core.actions.link_wallet_address import LinkWalletAddressAction +from hyperbolic_agentkit_core.actions.remote_finetune_example import RunFinetuneAction # WARNING: All new HyperbolicAction subclasses must be imported above, otherwise they will not be discovered # by get_all_hyperbolic_actions(). The import ensures the class is registered as a subclass of HyperbolicAction. def get_all_hyperbolic_actions() -> list[type[HyperbolicAction]]: @@ -22,5 +23,5 @@ def get_all_hyperbolic_actions() -> list[type[HyperbolicAction]]: __all__ = [ "HYPERBOLIC_ACTIONS", "HyperbolicAction", "RentComputeAction", "GetAvailableGpusAction", - "GetGpuStatusAction", "SSHAccessAction", "RemoteShellAction", "TerminateComputeAction", "GetSpendHistoryAction", "GetCurrentBalanceAction", "LinkWalletAddressAction" + "GetGpuStatusAction", "SSHAccessAction", "RemoteShellAction", "TerminateComputeAction", "GetSpendHistoryAction", "GetCurrentBalanceAction", "LinkWalletAddressAction", "RunFinetuneAction" ] diff --git a/hyperbolic_agentkit_core/actions/remote_finetune_example.py b/hyperbolic_agentkit_core/actions/remote_finetune_example.py new file mode 100644 index 00000000..8e9cf437 --- /dev/null +++ b/hyperbolic_agentkit_core/actions/remote_finetune_example.py @@ -0,0 +1,208 @@ +import requests +import json +import os +import subprocess +from typing import Optional, Dict, Any +from collections.abc import Callable +from pydantic import BaseModel, Field +from langchain.tools import BaseTool +from langchain.callbacks.manager import CallbackManagerForToolRun + +from hyperbolic_agentkit_core.actions.hyperbolic_action import HyperbolicAction + +from hyperbolic_agentkit_core.actions.ssh_manager import ssh_manager +from hyperbolic_agentkit_core.actions.get_gpu_status import get_gpu_status + + +class RunFinetuneInput(BaseModel): + """Input argument schema for fine-tuning action.""" + model_name: str = Field( + ..., + description="The name of the base model to fine-tune" + ) + +def run_finetune(model_name: str) -> str: + """Run the example finetune action workflow. + + Args: + model_name (str): The name of the base model to fine-tune + + Returns: + str: JSON string containing the status and results of the fine-tuning process + """ + try: + # Step 0: Get GPU status + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return json.dumps({"status": "error", "message": "No active GPU instances found"}) + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = int(parts[3]) + username = parts[1].split('@')[0] + + # Step 1: Establish SSH connection first + ssh_result = ssh_manager.connect(host=host, port=port, username=username) + if isinstance(ssh_result, str) and "Error" in ssh_result: + return json.dumps({"status": "error", "message": f"Failed to connect to remote GPU: {ssh_result}"}) + + # Step 2: Setup remote environment + shell_result = ssh_manager.execute( + "sudo apt-get update && " + "sudo apt-get install -y rsync python3-dev python3-pip build-essential git cmake pkg-config nano" + ) + if isinstance(shell_result, str) and ("E: Unable to locate package" in shell_result or + "E: Failed to fetch" in shell_result or + "E: Could not install" in shell_result): + return json.dumps({"status": "error", "message": f"Failed to install required packages: {shell_result}"}) + + # Step 3: Sync files to remote GPU + sync_result = sync_to_remote() + if isinstance(sync_result, dict) and not sync_result.get("success", False): + return json.dumps({"status": "error", "message": sync_result.get("error", "Sync failed")}) + + # Step 4: Execute setup, training, and test inference in a single shell session + combined_command = ( + "cd finetune_example && " + "bash -c '" + "python3 -m venv venv && " + "source venv/bin/activate && " + "pip install -r requirements.txt && " + f"FINE_TUNE_MODEL={model_name} python3 finetune.py && " + "python3 test_inference.py 'What can you tell me about LLMs?'" + "'" + ) + + shell_result = ssh_manager.execute(combined_command) + + # Check for the finetuned_model directory + verify_result = ssh_manager.execute("test -d /home/ubuntu/finetune_example/finetuned_model && echo 'exists'") + if "exists" not in str(verify_result): + return json.dumps({ + "status": "error", + "message": f"Fine-tuning failed or directory not created. Output: {shell_result}" + }) + + # Get the inference output + inference_output = ssh_manager.execute("cat /home/ubuntu/finetune_example/inference_output.json") + try: + inference_result = json.loads(inference_output) + except: + inference_result = {"error": "Failed to parse inference output"} + + return json.dumps({ + "status": "success", + "message": "Fine-tuning completed successfully", + "model_name": model_name, + "test_inference_output": inference_result + }) + + except Exception as e: + return json.dumps({ + "status": "error", + "message": str(e) + }) + +class RunFinetuneAction(HyperbolicAction): + """Run the example finetune action workflow.""" + + name: str = "run_finetune" + description: str = """This tool will execute fine-tuning of an AI model on Hyperbolic's GPU infrastructure. + It takes the model name as input (e.g., "unsloth/mistral-7b-v0.3-bnb-4bit"). + The model will be fine-tuned using unsloth and set up for local inference using vLLM. + Training data should be prepared in advance in the data/training_data.jsonl file.""" + args_schema: type[BaseModel] = RunFinetuneInput + return_direct: bool = False + func: Callable[..., str] = run_finetune + + + +def sync_to_remote() -> Dict[str, bool]: + """Syncs local files to remote GPU for fine-tuning.""" + try: + # Get GPU status and SSH details + gpu_status = get_gpu_status() + if not gpu_status.get("instances"): + return {"success": False, "error": "No active GPU instances found"} + + instance = gpu_status["instances"][0] + ssh_command = instance["sshCommand"] + + # Parse SSH command with error handling + try: + parts = ssh_command.split() + host = parts[1].split('@')[1] + port = parts[3] + username = parts[1].split('@')[0] + except (IndexError, AttributeError) as e: + return {"success": False, "error": f"Failed to parse SSH command: {str(e)}"} + + # Required files to sync + local_files = [ + "./finetune_example/training_data.jsonl", + "./finetune_example/requirements.txt", + "./finetune_example/finetune.py", + "./finetune_example/test_inference.py" + ] + + + # Check if all required files exist + for local_file in local_files: + if not os.path.exists(local_file): + return { + "success": False, + "error": f"Required file not found: {local_file}" + } + + # Create base directory on remote + result = ssh_manager.execute("mkdir -p ~/finetune_example") + if isinstance(result, str) and "error" in result.lower(): + return { + "success": False, + "error": f"Failed to create base directory: {result}" + } + + + # Sync each file with error handling + for local_file in local_files: + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" + + # Run rsync with output capture + result = subprocess.run([ + "rsync", "-avz", + "-e", f"ssh -p {port}", + local_file, + f"{username}@{host}:{remote_path}" + ], capture_output=True, text=True) + + if result.returncode != 0: + return { + "success": False, + "error": f"Failed to sync {local_file}: {result.stderr}" + } + + # Verify files were synced correctly + for local_file in local_files: + remote_path = f"/home/ubuntu/finetune_example/{'/'.join(local_file.split('/')[2:])}" + check_result = ssh_manager.execute(f"test -f {remote_path} && echo 'exists'") + + if "exists" not in str(check_result): + return { + "success": False, + "error": f"Failed to verify file sync for: {remote_path}" + } + + return { + "success": True, + "message": "All files synced successfully" + } + + except Exception as e: + return { + "success": False, + "error": f"Unexpected error during file sync: {str(e)}" + } \ No newline at end of file diff --git a/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py b/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py index 3d07d661..4b422534 100644 --- a/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py +++ b/hyperbolic_langchain/agent_toolkits/hyperbolic_toolkit.py @@ -57,6 +57,7 @@ class HyperbolicToolkit(BaseToolkit): get_current_balance ssh_access link_wallet_address + run_remote_finetune Use within an agent: .. code-block:: python