diff --git a/fern/docs.yml b/fern/docs.yml index fc8aa949..5461287f 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -239,7 +239,6 @@ navigation: - page: Change model and parameters path: pages/04-lemur/customize-parameters.mdx slug: /customize-parameters - hidden: true - page: Apply LLMs to audio files path: pages/04-lemur/apply-llms-to-audio-files.mdx slug: /apply-llms-to-audio-files @@ -253,7 +252,6 @@ navigation: - page: Create custom LLM prompts path: pages/04-lemur/examples.mdx slug: /examples - hidden: true - page: Prompt engineering path: pages/04-lemur/improving-your-prompt.mdx slug: /improving-your-prompt @@ -697,22 +695,9 @@ navigation: - page: Prompt A Structured Q&A Response Using LLM Gateway path: pages/05-guides/cookbooks/lemur/task-endpoint-structured-QA.mdx slug: task-endpoint-structured-QA - hidden: true - page: Estimate Input Token Costs for LLM Gateway path: pages/05-guides/cookbooks/lemur/counting-tokens.mdx slug: counting-tokens - hidden: true - - section: Analyze Speakers with LLM Gateway - skip-slug: true - contents: - - page: Process Speaker Labels with LLM Gateways Custom Text Input Parameter - path: pages/05-guides/cookbooks/lemur/input-text-speaker-labels.mdx - slug: input-text-speaker-labels - hidden: true - - page: Identify Speaker Names From the Transcript Using LeMUR - path: pages/05-guides/cookbooks/lemur/speaker-identification.mdx - slug: speaker-identification - hidden: true - section: Get quotes and citations with LLM Gateway skip-slug: true contents: @@ -729,18 +714,12 @@ navigation: - page: Generate Transcript Citations using LLM Gateway path: pages/05-guides/cookbooks/lemur/lemur-transcript-citations.mdx slug: lemur-transcript-citations - hidden: true - section: Substitute Audio Intelligence with LLM Gateway skip-slug: true contents: - page: Analyze The Sentiment Of A Customer Call using LLM Gateway path: pages/05-guides/cookbooks/lemur/call-sentiment-analysis.mdx slug: call-sentiment-analysis - hidden: true - - page: Boost Transcription Accuracy with LLM Gateway (LLM Gateway Custom Vocab) - path: pages/05-guides/cookbooks/lemur/custom-vocab-lemur.mdx - slug: custom-vocab-lemur - hidden: true - page: Create Chapter Summaries with LeMURs Custom Text Input Parameter path: pages/05-guides/cookbooks/lemur/input-text-chapters.mdx slug: input-text-chapters @@ -760,10 +739,6 @@ navigation: - page: Implement a Sales Playbook Using LLM Gateway path: pages/05-guides/cookbooks/lemur/sales-playbook.mdx slug: sales-playbook - - page: Pass Context from Previous LLM Gateway Requests - path: pages/05-guides/cookbooks/lemur/past-response-prompts.mdx - slug: past-response-prompts - hidden: true - page: Generate Meeting Action Items with LeMUR path: pages/05-guides/generate-meeting-action-items-with-lemur.mdx slug: generate-meeting-action-items-with-lemur @@ -799,6 +774,22 @@ navigation: href: /docs/guides/speechmatics_to_aai_streaming # Legacy guides + - page: Process Speaker Labels with LeMURs Custom Text Input Parameter + path: pages/05-guides/cookbooks/lemur/input-text-speaker-labels.mdx + slug: input-text-speaker-labels + hidden: true + - page: Identify Speaker Names From the Transcript Using LeMUR + path: pages/05-guides/cookbooks/lemur/speaker-identification.mdx + slug: speaker-identification + hidden: true + - page: Pass Context from Previous LeMUR Requests + path: pages/05-guides/cookbooks/lemur/past-response-prompts.mdx + slug: past-response-prompts + hidden: true + - page: Boost Transcription Accuracy with LeMUR (LeMUR Custom Vocab) + path: pages/05-guides/cookbooks/lemur/custom-vocab-lemur.mdx + slug: custom-vocab-lemur + hidden: true - page: Automatic Language Detection Workflow path: pages/05-guides/automatic-language-detection-workflow.mdx hidden: true diff --git a/fern/pages/05-guides/cookbooks/lemur/call-sentiment-analysis.mdx b/fern/pages/05-guides/cookbooks/lemur/call-sentiment-analysis.mdx index 05fcfae6..8e94cc47 100644 --- a/fern/pages/05-guides/cookbooks/lemur/call-sentiment-analysis.mdx +++ b/fern/pages/05-guides/cookbooks/lemur/call-sentiment-analysis.mdx @@ -1,133 +1,308 @@ --- -title: "Analyze The Sentiment Of A Customer Call using LeMUR" +title: "Analyze The Sentiment Of A Customer Call using LLM Gateway" --- -In this guide, we'll show you how to use AssemblyAI's LeMUR (Leveraging Large Language Models to Understand Recognized Speech) framework to process an audio file and then use [LeMUR's Question & Answer feature](https://www.assemblyai.com/docs/api-reference/lemur) to automatically detect sentiment analysis from customer calls as "positive", "negative", or "neutral". In addition, we will glean additional insights beyond these three sentiments and learn the reasoning behind these detected sentiments. +In this guide, we'll show you how to use AssemblyAI's LLM Gateway framework to process an audio file and then use [LLM Gateway](/docs/llm-gateway) to automatically detect sentiment analysis from customer calls as "positive", "negative", or "neutral". In addition, we will glean additional insights beyond these three sentiments and learn the reasoning behind these detected sentiments. ## Quickstart ```python -import assemblyai as aai +import requests +import time +import json -aai.settings.api_key = "YOUR_API_KEY" +API_KEY = "YOUR_API_KEY" +audio_file_path = "./meeting.mp3" -transcriber = aai.Transcriber() -transcript = transcriber.transcribe("./meeting.mp3") # You can also provide a URL to a publicly available audio file +# ------------------------------------------ +# Step 1: Upload the audio file +# ------------------------------------------ +def upload_file(filename): + with open(filename, "rb") as f: + upload_url = "https://api.assemblyai.com/v2/upload" + headers = {"authorization": API_KEY} + response = requests.post(upload_url, headers=headers, data=f) + response.raise_for_status() + return response.json()["upload_url"] +audio_url = upload_file(audio_file_path) +print(f"Uploaded audio file. URL: {audio_url}") + +# ------------------------------------------ +# Step 2: Request transcription +# ------------------------------------------ +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) + +transcript_id = transcript_request.json()["id"] + +# Poll until completed +while True: + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] + break + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") + else: + print(f"Transcription status: {status}") + time.sleep(3) + +print("\nTranscription complete.\n") + +# ------------------------------------------ +# Step 3: Define questions +# ------------------------------------------ agent_context = "The agent is trying to get the customer to go through with the update to their car." customer_context = "The customer is calling to check how much it would cost to update the map in his car." answer_format = " " questions = [ - aai.LemurQuestion( - question="What was the overall sentiment of the call?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What was the sentiment of the agent in this call?", - context=agent_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What was the sentiment of the customer in this call?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What quote best demonstrates the customer's level of interest?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="Provide a quote from the agent that demonstrates their level of enthusiasm.", - context=agent_context, - answer_format=answer_format, - ), + { + "question": "What was the overall sentiment of the call?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "What was the sentiment of the agent in this call?", + "context": agent_context, + "answer_format": answer_format, + }, + { + "question": "What was the sentiment of the customer in this call?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "What quote best demonstrates the customer's level of interest?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "Provide a quote from the agent that demonstrates their level of enthusiasm.", + "context": agent_context, + "answer_format": answer_format, + }, ] -result = transcript.lemur.question(questions, final_model=aai.LemurModel.claude3_5_sonnet) +# ------------------------------------------ +# Step 4: Build prompt for the LLM +# ------------------------------------------ +question_strs = [] +for q in questions: + q_str = f"Question: {q['question']}" + if q.get("context"): + q_str += f"\nContext: {q['context']}" + if q.get("answer_format"): + q_str += f"\nAnswer Format: {q['answer_format']}" + question_strs.append(q_str) + +questions_prompt = "\n\n".join(question_strs) + +prompt = f""" +You are an expert at analyzing call transcripts. +Given the series of questions below, answer them accurately and concisely. +When context or answer format is provided, use it to guide your answers. + +Transcript: +{transcript_text} -for qa in result.response: - print(qa.question) - print(qa.answer) +Questions: +{questions_prompt} +""" + +# ------------------------------------------ +# Step 5: Query the LLM Gateway +# ------------------------------------------ +headers = {"authorization": API_KEY} + +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2000, + }, +) + +response_json = response.json() +llm_output = response_json["choices"][0]["message"]["content"] + +# ------------------------------------------ +# Step 6: Parse and display the results +# ------------------------------------------ +print("\n--- LLM Responses ---\n") +print(llm_output) ``` ## Get Started Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up for an AssemblyAI account](https://www.assemblyai.com/app) and get your API key from your dashboard. -LeMUR features are currently only available to paid users at two pricing tiers: LeMUR and LeMUR Basic. See [pricing](https://www.assemblyai.com/pricing#:~:text=LeMUR) for more detail. +See our [pricing page](https://www.assemblyai.com/pricing#pricing_llm-gateway) for LLM Gateway pricing rates. + ## Step-by-Step Instructions In this guide, we will ask five questions to learn about the sentiment of the customer and agent. You can adjust the questions to suit your project's needs. -Import the `assemblyai` package and set your API key. +Start by importing the required libraries and setting your AssemblyAI API key. ```python -import assemblyai as aai +import requests +import time +import json -aai.settings.api_key = "your_api_key" +API_KEY = "YOUR_API_KEY" ``` -Use the `Transcriber` object's `transcribe` method and pass in the audio file's path as a parameter. The `transcribe` method will save the results of the transcription to the `Transcriber` object's `transcript` attribute. +Next, you’ll upload your audio file to AssemblyAI’s servers. +Once the upload is complete, the API will return a temporary URL that can be used to start the transcription. +After submitting the transcription request, your script will poll the API until the transcription is finished. ```python -transcriber = aai.Transcriber() -transcript = transcriber.transcribe("./meeting.mp3") +audio_file_path = "./meeting.mp3" +# ------------------------------------------ +# Step 1: Upload the audio file +# ------------------------------------------ +def upload_file(filename): + with open(filename, "rb") as f: + upload_url = "https://api.assemblyai.com/v2/upload" + headers = {"authorization": API_KEY} + response = requests.post(upload_url, headers=headers, data=f) + response.raise_for_status() + return response.json()["upload_url"] +audio_url = upload_file(audio_file_path) +print(f"Uploaded audio file. URL: {audio_url}") +# ------------------------------------------ +# Step 2: Request transcription +# ------------------------------------------ +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) +transcript_id = transcript_request.json()["id"] +# Poll until completed +while True: + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + if status == "completed": + transcript_text = polling_response.json()["text"] + break + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") + else: + print(f"Transcription status: {status}") + time.sleep(3) +print("\nTranscription complete.\n") ``` -Define your LeMUR request `context` parameters for the Question & Answer feature. +Once you have the transcript, you’ll define short context strings for both the agent and the customer. +These will help the model better understand the roles and perspectives in the conversation. ```python agent_context = "The agent is trying to get the customer to go through with the update to their car." customer_context = "The customer is calling to check how much it would cost to update the map in his car." ``` -Define your `answer_format` and `questions` parameters for the Question & Answer feature. +You can now specify the exact questions you want the LLM Gateway to answer. +Each question can include optional context and an answer format that tells the model how to structure its response. ```python answer_format = " " - questions = [ - aai.LemurQuestion( - question="What was the overall sentiment of the call?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What was the sentiment of the agent in this call?", - context=agent_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What was the sentiment of the customer in this call?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="What quote best demonstrates the customer's level of interest?", - context=customer_context, - answer_format=answer_format, - ), - aai.LemurQuestion( - question="Provide a quote from the agent that demonstrates their level of enthusiasm.", - context=agent_context, - answer_format=answer_format, - ), + { + "question": "What was the overall sentiment of the call?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "What was the sentiment of the agent in this call?", + "context": agent_context, + "answer_format": answer_format, + }, + { + "question": "What was the sentiment of the customer in this call?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "What quote best demonstrates the customer's level of interest?", + "context": customer_context, + "answer_format": answer_format, + }, + { + "question": "Provide a quote from the agent that demonstrates their level of enthusiasm.", + "context": agent_context, + "answer_format": answer_format, + }, ] ``` -Run the `question` method on `transcript` and print the result to your terminal. +Now that the questions are defined, combine them into a single formatted prompt. +This prompt includes both the call transcript and the questions you want the model to address. +The model will use these details to generate accurate and concise responses. ```python -result = transcript.lemur.question(questions, final_model=aai.LemurModel.claude3_5_sonnet) - -for qa in result.response: - print(qa.question) - print(qa.answer) +# ------------------------------------------ +# Step 4: Build prompt for the LLM +# ------------------------------------------ +question_strs = [] +for q in questions: + q_str = f"Question: {q['question']}" + if q.get("context"): + q_str += f"\nContext: {q['context']}" + if q.get("answer_format"): + q_str += f"\nAnswer Format: {q['answer_format']}" + question_strs.append(q_str) +questions_prompt = "\n\n".join(question_strs) +prompt = f""" +You are an expert at analyzing call transcripts. +Given the series of questions below, answer them accurately and concisely. +When context or answer format is provided, use it to guide your answers. +Transcript: +{transcript_text} +Questions: +{questions_prompt} +""" ``` -The output will look similar to the example below. +With the prompt prepared, query LLM Gateway then extract and print the answers returned by the LLM Gateway. +This step displays the model’s assessment of each question, including the identified sentiments and their reasoning. + +```python +# ------------------------------------------ +# Step 5: Query the LLM Gateway +# ------------------------------------------ +headers = {"authorization": API_KEY} +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2000, + }, +) +response_json = response.json() +llm_output = response_json["choices"][0]["message"]["content"] +# ------------------------------------------ +# Step 6: Parse and display the results +# ------------------------------------------ +print("\n--- LLM Responses ---\n") +print(llm_output) +``` \ No newline at end of file diff --git a/fern/pages/05-guides/cookbooks/lemur/task-endpoint-action-items.mdx b/fern/pages/05-guides/cookbooks/lemur/task-endpoint-action-items.mdx index 0299817a..7625443d 100644 --- a/fern/pages/05-guides/cookbooks/lemur/task-endpoint-action-items.mdx +++ b/fern/pages/05-guides/cookbooks/lemur/task-endpoint-action-items.mdx @@ -2,7 +2,7 @@ title: "Generate Action Items with LLM Gateway" --- -This tutorial will demonstrate how to use AssemblyAI's [LLM Gateway](/docs/llm-gateway) framework to create action items from a transcript. LLM Gateway provides access to multiple LLM providers through a unified API. +This tutorial will demonstrate how to use AssemblyAI's [LLM Gateway](/docs/llm-gateway) framework to create action items from a transcript. ## Quickstart @@ -10,147 +10,130 @@ This tutorial will demonstrate how to use AssemblyAI's [LLM Gateway](/docs/llm-g import requests import time -base_url = "https://api.assemblyai.com" -headers = {"authorization": ""} - -# Use a publicly-accessible URL: +API_KEY = "YOUR_API_KEY" audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4" -# with open("/your_audio_file.mp3", "rb") as f: -# response = requests.post(base_url + "/v2/upload", headers=headers, data=f) -# if response.status_code != 200: -# print(f"Error: {response.status_code}, Response: {response.text}") -# response.raise_for_status() -# upload_json = response.json() -# audio_url = upload_json["upload_url"] - -data = { - "audio_url": audio_url, -} - -response = requests.post(base_url + "/v2/transcript", headers=headers, json=data) - -if response.status_code != 200: - print(f"Error: {response.status_code}, Response: {response.text}") +# Step 1: Upload or provide audio URL and start transcription +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) -transcript_json = response.json() -transcript_id = transcript_json["id"] -polling_endpoint = f"{base_url}/v2/transcript/{transcript_id}" +transcript_id = transcript_request.json()["id"] +# Step 2: Poll until transcription completes while True: - transcript = requests.get(polling_endpoint, headers=headers).json() - if transcript["status"] == "completed": - print(transcript['id']) - print(f" \nFull Transcript: \n\n{transcript['text']}\n") - + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] break - elif transcript["status"] == "error": - raise RuntimeError(f"Transcription failed: {transcript['error']}") + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") else: + print(f"Transcription status: {status}") time.sleep(3) -prompt = f""" - Here are guidelines to follow: - - You are an expert at understanding transcripts of conversations, calls and meetings. - - You are an expert at coming up with ideal action items based on the contents of the transcripts. - - Action items are things that the transcript implies should get done. - - Your action item ideas do not make stuff up that isn't relevant to the transcript. - - You do not needlessly make up action items - you stick to important tasks. - - You are useful, true and concise, and write in perfect English. - - Your action items can be tied back to direct quotes in the transcript. - - You do not cite the quotes the action items relate to. - - The action items are written succinctly. - - Please give useful action items based on the transcript. - - Your response should be formatted in bullet points. - """ - -llm_gateway_data = { - "model": "claude-sonnet-4-5-20250929", - "messages": [ - { - "role": "user", - "content": f"{prompt} Please give useful action items based on this transcript: \n\n{transcript["text"]}." - } - ], - "max_tokens": 1500, - "temperature": 0 - } +# Step 3: Build the prompt +prompt = """ +Here are guidelines to follow: +- You are an expert at understanding transcripts of conversations, calls and meetings. +- You are an expert at coming up with ideal action items based on the contents of the transcripts. +- Action items are things that the transcript implies should get done. +- Your action item ideas do not make stuff up that isn't relevant to the transcript. +- You do not needlessly make up action items - you stick to important tasks. +- You are useful, true and concise, and write in perfect English. +- Your action items can be tied back to direct quotes in the transcript. +- You do not cite the quotes the action items relate to. +- The action items are written succinctly. +- Please give useful action items based on the transcript. +""" + +answer_format = "Bullet Points" +if answer_format: + prompt += f"\nYour response should have the following format: {answer_format}" + +# Step 4: Send transcript text to LLM Gateway +headers = {"authorization": API_KEY} response = requests.post( - "https://llm-gateway.assemblyai.com/v1/chat/completions", - headers=headers, - json=llm_gateway_data + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [ + { + "role": "user", + "content": f"{prompt}\n\nTranscript:\n{transcript_text}", + } + ], + "max_tokens": 1000, + }, ) -result = response.json() - -if "error" in result: - print(f"\nError from LLM Gateway: {result['error']}") -else: - response_text = result['choices'][0]['message']['content'] - print(f"\nResponse ID: {result["request_id"]}\n") - print(response_text) +# Step 5: Print the LLM-generated action items +response_json = response.json() +print(response_json["choices"][0]["message"]["content"]) ``` ## Getting Started Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up for an AssemblyAI account](https://www.assemblyai.com/app) and get your API key from your dashboard. -Find more details on the current LLM Gateway pricing on the AssemblyAI [pricing page](https://www.assemblyai.com/pricing). +Find more details on the current LLM Gateway pricing in the AssemblyAI [pricing page](https://www.assemblyai.com/pricing). ## Step-by-Step Instructions -In this guide, we'll prompt LLM Gateway to create actions items based on a transcript. +In this guide, we will prompt LLM Gateway to generate action items from our transcript. + -Import the required pagackes and set the base URL and headers. +First, we'll import the necessary libraries and set our AssemblyAI API key. ```python import requests import time -base_url = "https://api.assemblyai.com" -headers = {"authorization": ""} +API_KEY = "YOUR_API_KEY" ``` Use AssemblyAI to transcribe a file and save the transcript. ```python +# Step 1: Upload or provide audio URL and start transcription audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4" -# with open("/your_audio_file.mp3", "rb") as f: -# response = requests.post(base_url + "/v2/upload", headers=headers, data=f) -# if response.status_code != 200: -# print(f"Error: {response.status_code}, Response: {response.text}") -# response.raise_for_status() -# upload_json = response.json() -# audio_url = upload_json["upload_url"] - -data = { - "audio_url": audio_url, -} - -response = requests.post(base_url + "/v2/transcript", headers=headers, json=data) - -if response.status_code != 200: - print(f"Error: {response.status_code}, Response: {response.text}") +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) -transcript_json = response.json() -transcript_id = transcript_json["id"] -polling_endpoint = f"{base_url}/v2/transcript/{transcript_id}" +transcript_id = transcript_request.json()["id"] +# Step 2: Poll until transcription completes while True: - transcript = requests.get(polling_endpoint, headers=headers).json() - if transcript["status"] == "completed": - print(transcript['id']) - print(f" \nFull Transcript: \n\n{transcript['text']}\n") - + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] break - elif transcript["status"] == "error": - raise RuntimeError(f"Transcription failed: {transcript['error']}") + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") else: + print(f"Transcription status: {status}") time.sleep(3) ``` -Define your detailed prompt instructions for generating action items based on the transcript. This is an example prompt, which you can modify to suit your specific requirements. + +Provide detailed instructions to prompt LLM Gateway to create action items from the transcript. ```python prompt = f""" @@ -172,33 +155,35 @@ prompt = f""" Generate the custom action items using LLM Gateway. ```python -llm_gateway_data = { - "model": "claude-sonnet-4-5-20250929", - "messages": [ - { - "role": "user", - "content": f"{prompt} Please give useful action items based on this transcript: \n\n{transcript["text"]}." - } - ], - "max_tokens": 1500 - } +answer_format = "Bullet Points" +if answer_format: + prompt += f"\nYour response should have the following format: {answer_format}" +``` + +Prompt LLM Gateway using the transcript results and return the response. + +```python +# Step 4: Send transcript text to LLM Gateway +headers = {"authorization": API_KEY} response = requests.post( - "https://llm-gateway.assemblyai.com/v1/chat/completions", - headers=headers, - json=llm_gateway_data + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [ + { + "role": "user", + "content": f"{prompt}\n\nTranscript:\n{transcript_text}", + } + ], + "max_tokens": 1000, + }, ) -``` -Finally, save and return the LLM response. +# Step 5: Print the LLM-generated action items +response_json = response.json() +print(response_json["choices"][0]["message"]["content"]) + -```python -result = response.json() - -if "error" in result: - print(f"\nError from LLM Gateway: {result['error']}") -else: - response_text = result['choices'][0]['message']['content'] - print(f"\nResponse ID: {result["request_id"]}\n") - print(response_text) ``` diff --git a/fern/pages/05-guides/cookbooks/lemur/task-endpoint-structured-QA.mdx b/fern/pages/05-guides/cookbooks/lemur/task-endpoint-structured-QA.mdx index 01bd9a0a..a57a0ad3 100644 --- a/fern/pages/05-guides/cookbooks/lemur/task-endpoint-structured-QA.mdx +++ b/fern/pages/05-guides/cookbooks/lemur/task-endpoint-structured-QA.mdx @@ -1,67 +1,105 @@ --- -title: "Prompt A Structured Q&A Response Using LeMUR" +title: "Prompt A Structured Q&A Response Using LLM Gateway" --- -This Colab will demonstrate how to use AssemblyAI's [LeMUR](https://www.assemblyai.com/blog/lemur/) (Leveraging Large Language Models to Understand Recognized Speech) framework to prompt a structured Question and Answer response using the Task Endpoint. - +This cookbook will demonstrate how to use AssemblyAI's [LLM Gateway](/docs/llm-gateway) framework to prompt a structured question and answer response. ## Quickstart ```python -import assemblyai as aai +import requests +import time import xml.etree.ElementTree as ET -aai.settings.api_key = "YOUR_API_KEY" +API_KEY = "YOUR_API_KEY" audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4" -transcript = aai.Transcriber().transcribe(audio_url) +# ------------------------------- +# Step 1: Transcribe the audio +# ------------------------------- +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) +transcript_id = transcript_request.json()["id"] + +# Poll for completion +while True: + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] + break + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") + else: + print(f"Transcription status: {status}") + time.sleep(3) + +# ------------------------------- +# Step 2: Build question helper functions +# ------------------------------- def construct_question(question): - question_str = f"Question: {question.question}" + question_str = f"Question: {question['question']}" - if question.context: - question_str += f"\nContext: {question.context}" + if question.get("context"): + question_str += f"\nContext: {question['context']}" - # Set default answer_format to "short sentence" if not provided - if not question.answer_format: - question.answer_format = "short sentence" + # Default answer_format + if not question.get("answer_format"): + question["answer_format"] = "short sentence" - question_str += f"\nAnswer Format: {question.answer_format}" + question_str += f"\nAnswer Format: {question['answer_format']}" - if question.answer_options: - options_str = ", ".join(question.answer_options) + if question.get("answer_options"): + options_str = ", ".join(question["answer_options"]) question_str += f"\nOptions: {options_str}" return question_str + "\n" + def escape_xml_characters(xml_string): - return xml_string.replace('&', '&') + return xml_string.replace("&", "&") + +# ------------------------------- +# Step 3: Define questions +# ------------------------------- questions = [ - aai.LemurQuestion( - question="What are the top level KPIs for engineering?", - context="KPI stands for key performance indicator", - answer_format="short sentence"), - aai.LemurQuestion( - question="How many days has it been since the data team has gotten updated metrics?", - answer_options=["1", "2", "3", "4", "5", "6", "7", "more than 7"]), - aai.LemurQuestion( - question="What are the future plans for the project?") + { + "question": "What are the top level KPIs for engineering?", + "context": "KPI stands for key performance indicator", + "answer_format": "short sentence", + }, + { + "question": "How many days has it been since the data team has gotten updated metrics?", + "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"], + }, + {"question": "What are the future plans for the project?"}, ] -question_str = '\n'.join(construct_question(q) for q in questions) +question_str = "\n".join(construct_question(q) for q in questions) +# ------------------------------- +# Step 4: Build the LLM prompt +# ------------------------------- prompt = f"""You are an expert at giving accurate answers to questions about texts. - No preamble. - Given the series of questions, answer the questions. - Each question may follow up with answer format, answer options, and context for each question. - It is critical that you follow the answer format and answer options for each question. - When context is provided with a question, refer to it when answering the question. - You are useful, true and concise, and write in perfect English. - Only the question is allowed between the tag. Do not include the answer format, options, or question context in your response. - Only text is allowed between the and tags. - XML tags are not allowed between the and tags. - End your response with a closing tag. - For each question-answer pair, format your response according to the template provided below: +No preamble. +Given the series of questions, answer the questions. +Each question may follow up with answer format, answer options, and context for each question. +It is critical that you follow the answer format and answer options for each question. +When context is provided with a question, refer to it when answering the question. +You are useful, true and concise, and write in perfect English. +Only the question is allowed between the tag. Do not include the answer format, options, or question context in your response. +Only text is allowed between the and tags. +XML tags are not allowed between the and tags. +End your response with a closing tag. +For each question-answer pair, format your response according to the template provided below: Template for response: @@ -77,47 +115,245 @@ Template for response: These are the questions: {question_str} + +Transcript: +{transcript_text} """ -result = transcript.lemur.task( - prompt, - final_model=aai.LemurModel.claude3_5_sonnet +# ------------------------------- +# Step 5: Query LLM Gateway +# ------------------------------- +headers = {"authorization": API_KEY} + +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2000, + }, ) -response = result.response - -# Escape special XML characters and strip any leading/trailing whitespace -clean_response = escape_xml_characters(response).strip() -root = ET.fromstring(clean_response) -for response in root.findall('response'): - question = response.find('question').text - answer = response.find('answer').text - print(f"Question: {question}") - print(f"Answer: {answer}") +response_json = response.json() +llm_output = response_json["choices"][0]["message"]["content"] + +# ------------------------------- +# Step 6: Parse and print XML response +# ------------------------------- +clean_response = escape_xml_characters(llm_output).strip() + +try: + root = ET.fromstring(clean_response) + for resp in root.findall("response"): + question = resp.find("question").text + answer = resp.find("answer").text + print(f"Question: {question}") + print(f"Answer: {answer}\n") +except ET.ParseError as e: + print("Could not parse XML response.") + print("Raw model output:\n", llm_output) ``` ## Getting Started Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up for an AssemblyAI account](https://www.assemblyai.com/app) and get your API key from your dashboard. -Find more details on the current LeMUR pricing in the AssemblyAI [pricing page](https://www.assemblyai.com/pricing#:~:text=LeMUR). +Find more details on the current LLM Gateway pricing in the AssemblyAI [pricing page](https://www.assemblyai.com/pricing). ## Step-by-Step Instructions -In this guide, we will prompt LeMUR with a structured Q&A format and generate an XML response. +In this guide, we will prompt LLM Gateway with a structured Q&A format and generate an XML response. + +First, let's import the necessary libraries and set our API key. + +```python +import requests +import time +import xml.etree.ElementTree as ET + +API_KEY = "YOUR_API_KEY" +``` + +Next, we'll use AssemblyAI to transcribe a file and save our transcript. + +```python +audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4" + +# ------------------------------- +# Step 1: Transcribe the audio +# ------------------------------- +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) + +transcript_id = transcript_request.json()["id"] + +# Poll for completion +while True: + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] + break + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") + else: + print(f"Transcription status: {status}") + time.sleep(3) +``` + +Construct a formatted string to structure the questions. This includes the question text, context, an answer format, any answer options, then returns the formatted string. + +```python +# ------------------------------- +# Step 2: Build question helper functions +# ------------------------------- +def construct_question(question): + question_str = f"Question: {question['question']}" + + if question.get("context"): + question_str += f"\nContext: {question['context']}" + + # Default answer_format + if not question.get("answer_format"): + question["answer_format"] = "short sentence" + + question_str += f"\nAnswer Format: {question['answer_format']}" + + if question.get("answer_options"): + options_str = ", ".join(question["answer_options"]) + question_str += f"\nOptions: {options_str}" + + return question_str + "\n" + +def escape_xml_characters(xml_string): + return xml_string.replace("&", "&") +``` + +Define a list of questions. For each question, you can define additional `context` and specify either an `answer_format` or a list of `answer_options`. + +```python +# ------------------------------- +# Step 3: Define questions +# ------------------------------- +questions = [ + { + "question": "What are the top level KPIs for engineering?", + "context": "KPI stands for key performance indicator", + "answer_format": "short sentence", + }, + { + "question": "How many days has it been since the data team has gotten updated metrics?", + "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"], + }, + {"question": "What are the future plans for the project?"}, +] +``` + +Construct the formatted question string for all the questions and build the LLM prompt. -First, let's install the AssemblyAI SDK. +```python +question_str = '\n'.join(construct_question(q) for q in questions) +``` + +Provide detailed instructions to prompt LLM Gateway to answer a series of questions. This also defines a structured XML template for the responses. -```bash -pip install -U assemblyai +```python +# ------------------------------- +# Step 4: Build the LLM prompt +# ------------------------------- +prompt = f"""You are an expert at giving accurate answers to questions about texts. +No preamble. +Given the series of questions, answer the questions. +Each question may follow up with answer format, answer options, and context for each question. +It is critical that you follow the answer format and answer options for each question. +When context is provided with a question, refer to it when answering the question. +You are useful, true and concise, and write in perfect English. +Only the question is allowed between the tag. Do not include the answer format, options, or question context in your response. +Only text is allowed between the and tags. +XML tags are not allowed between the and tags. +End your response with a closing tag. +For each question-answer pair, format your response according to the template provided below: + +Template for response: + + + The question + Your answer + + + ... + + ... + + +These are the questions: +{question_str} + +Transcript: +{transcript_text} +""" + +# ------------------------------- +# Step 5: Query LLM Gateway +# ------------------------------- +headers = {"authorization": API_KEY} + +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2000, + }, +) + +response_json = response.json() +llm_output = response_json["choices"][0]["message"]["content"] + +# ------------------------------- +# Step 6: Parse and print XML response +# ------------------------------- +clean_response = escape_xml_characters(llm_output).strip() + +try: + root = ET.fromstring(clean_response) + for resp in root.findall("response"): + question = resp.find("question").text + answer = resp.find("answer").text + print(f"Question: {question}") + print(f"Answer: {answer}\n") +except ET.ParseError as e: + print("Could not parse XML response.") + print("Raw model output:\n", llm_output) ``` -Then we'll import the SDK and set our AssemblyAI API key. +## Getting Started + +Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up for an AssemblyAI account](https://www.assemblyai.com/app) and get your API key from your dashboard. + +Find more details on the current LLM Gateway pricing in the AssemblyAI [pricing page](https://www.assemblyai.com/pricing). + +## Step-by-Step Instructions + +In this guide, we will prompt LLM Gateway with a structured Q&A format and generate an XML response. + +First, let's import the necessary libraries and set our API key. ```python -import assemblyai as aai +import requests +import time +import xml.etree.ElementTree as ET -aai.settings.api_key = "API_KEY_HERE" +API_KEY = "YOUR_API_KEY" ``` Next, we'll use AssemblyAI to transcribe a file and save our transcript. @@ -125,19 +361,47 @@ Next, we'll use AssemblyAI to transcribe a file and save our transcript. ```python audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4" -transcript = aai.Transcriber().transcribe(audio_url) +# ------------------------------- +# Step 1: Transcribe the audio +# ------------------------------- +transcript_request = requests.post( + "https://api.assemblyai.com/v2/transcript", + headers={"authorization": API_KEY, "content-type": "application/json"}, + json={"audio_url": audio_url}, +) + +transcript_id = transcript_request.json()["id"] + +# Poll for completion +while True: + polling_response = requests.get( + f"https://api.assemblyai.com/v2/transcript/{transcript_id}", + headers={"authorization": API_KEY}, + ) + status = polling_response.json()["status"] + + if status == "completed": + transcript_text = polling_response.json()["text"] + break + elif status == "error": + raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}") + else: + print(f"Transcription status: {status}") + time.sleep(3) ``` -Construct a formatted string to structure the questions from the `LemurQuestion` object. This includes the question text, optional context, an answer format (defaulting to "short sentence" if not provided), and any answer options, then returns the formatted string. +Construct a formatted string to structure the questions. This includes the question text, context, an answer format, and any answer options, then returns the formatted string. ```python +# ------------------------------- +# Step 2: Build question helper functions +# ------------------------------- def construct_question(question): question_str = f"Question: {question.question}" if question.context: question_str += f"\nContext: {question.context}" - # Set default answer_format to "short sentence" if not provided if not question.answer_format: question.answer_format = "short sentence" @@ -148,46 +412,55 @@ def construct_question(question): question_str += f"\nOptions: {options_str}" return question_str + "\n" + +def escape_xml_characters(xml_string): + return xml_string.replace("&", "&") ``` -Define a list of `aai.LemurQuestion` objects. For each question, you can define additional `context` and specify either a `answer_format` or a list of `answer_options`. +Define a list of questions. For each question, you can define additional `context` and specify either an `answer_format` or a list of `answer_options`. ```python +# ------------------------------- +# Step 3: Define questions +# ------------------------------- questions = [ - aai.LemurQuestion( - question="What are the top level KPIs for engineering?", - context="KPI stands for key performance indicator", - answer_format="short sentence"), - aai.LemurQuestion( - question="How many days has it been since the data team has gotten updated metrics?", - answer_options=["1", "2", "3", "4", "5", "6", "7", "more than 7"]), - aai.LemurQuestion( - question="What are the future plans for the project?") + { + "question": "What are the top level KPIs for engineering?", + "context": "KPI stands for key performance indicator", + "answer_format": "short sentence", + }, + { + "question": "How many days has it been since the data team has gotten updated metrics?", + "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"], + }, + {"question": "What are the future plans for the project?"}, ] - ``` -Construct the formatted question string for all the Questions within the list of `aai.LemurQuestion` objects. +Construct the formatted question string for all the questions and build the LLM prompt. ```python question_str = '\n'.join(construct_question(q) for q in questions) ``` -Provide detailed instructions to prompt LeMUR to answer a series of questions. This also defines a structured XML template for the responses. +Provide detailed instructions to prompt LLM Gateway to answer a series of questions. This also defines a structured XML template for the responses. ```python +# ------------------------------- +# Step 4: Build the LLM prompt +# ------------------------------- prompt = f"""You are an expert at giving accurate answers to questions about texts. - No preamble. - Given the series of questions, answer the questions. - Each question may follow up with answer format, answer options, and context for each question. - It is critical that you follow the answer format and answer options for each question. - When context is provided with a question, refer to it when answering the question. - You are useful, true and concise, and write in perfect English. - Only the question is allowed between the tag. Do not include the answer format, options, or question context in your response. - Only text is allowed between the and tags. - XML tags are not allowed between the and tags. - End your response with a closing tag. - For each question-answer pair, format your response according to the template provided below: +No preamble. +Given the series of questions, answer the questions. +Each question may follow up with answer format, answer options, and context for each question. +It is critical that you follow the answer format and answer options for each question. +When context is provided with a question, refer to it when answering the question. +You are useful, true and concise, and write in perfect English. +Only the question is allowed between the tag. Do not include the answer format, options, or question context in your response. +Only text is allowed between the and tags. +XML tags are not allowed between the and tags. +End your response with a closing tag. +For each question-answer pair, format your response according to the template provided below: Template for response: @@ -203,37 +476,50 @@ Template for response: These are the questions: {question_str} + +Transcript: +{transcript_text} """ ``` -Prompt the LeMUR model using the Task Endpoint and return the response. +Prompt the LLM Gateway model and return the response. ```python -result = transcript.lemur.task( - prompt, - final_model=aai.LemurModel.claude3_5_sonnet +# ------------------------------- +# Step 5: Query LLM Gateway +# ------------------------------- +headers = {"authorization": API_KEY} + +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "claude-sonnet-4-5-20250929", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2000, + }, ) -response = result.response -print(response) +response_json = response.json() +llm_output = response_json["choices"][0]["message"]["content"] ``` Clean the XML output and print the question and answer pairs. ```python -import xml.etree.ElementTree as ET - -def escape_xml_characters(xml_string): - return xml_string.replace('&', '&') - -# Escape special XML characters and strip any leading/trailing whitespace -clean_response = escape_xml_characters(response).strip() - -root = ET.fromstring(clean_response) - -for response in root.findall('response'): - question = response.find('question').text - answer = response.find('answer').text - print(f"Question: {question}") - print(f"Answer: {answer}") +# ------------------------------- +# Step 6: Parse and print XML response +# ------------------------------- +clean_response = escape_xml_characters(llm_output).strip() + +try: + root = ET.fromstring(clean_response) + for resp in root.findall("response"): + question = resp.find("question").text + answer = resp.find("answer").text + print(f"Question: {question}") + print(f"Answer: {answer}\n") +except ET.ParseError as e: + print("Could not parse XML response.") + print("Raw model output:\n", llm_output) ```