diff --git a/cpp/include/tensorrt_llm/runtime/iTensor.h b/cpp/include/tensorrt_llm/runtime/iTensor.h index b3c8cc8c2a7..eb5c10eeb69 100644 --- a/cpp/include/tensorrt_llm/runtime/iTensor.h +++ b/cpp/include/tensorrt_llm/runtime/iTensor.h @@ -65,7 +65,6 @@ class ITensor : virtual public IBuffer //! //! \brief Returns the tensor n-th dimension. If n is negative, returns the (nbDims - n)th dimension. - //! TODO: replace with constexpr parameter when moving to C++20. //! template [[nodiscard]] DimType64 getDimension() const diff --git a/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md b/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md index 243b2609e80..3b264e1ed36 100644 --- a/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md +++ b/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md @@ -219,8 +219,6 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso Here is an example response, showing that the TRT-LLM server reasons and answers the questions. -TODO: Use Chat Compeletions API / Responses API as the example after the PR is merged. - ```json {"id":"chatcmpl-c5bf51b5cab94e10ba5da5266d12ee59","object":"chat.completion","created":1755815898,"model":"openai/gpt-oss-120b","choices":[{"index":0,"message":{"role":"assistant","content":"analysisThe user asks: \"Where is New York?\" Likely they want location info. Provide answer: New York State in northeastern US, New York City on the east coast, coordinates, etc. Provide context.assistantfinal**New York** can refer to two related places in the United States:\n\n| What it is | Where it is | Approx. coordinates | How to picture it |\n|------------|------------|--------------------|-------------------|\n| **New York State** | The northeastern corner of the United States, bordered by **Vermont, Massachusetts, Connecticut, New Jersey, Pennsylvania, and the Canadian provinces of Ontario and Quebec**. | 42.7° N, 75.5° W (roughly the state’s geographic centre) | A roughly rectangular state that stretches from the Atlantic Ocean in the southeast to the Adirondack Mountains and the Great Lakes region in the north. |\n| **New York City (NYC)** | The largest city in the state, located on the **southern tip of the state** where the **Hudson River meets the Atlantic Ocean**. It occupies five boroughs: Manhattan, Brooklyn, Queens, The Bronx, and Staten Island. | 40.7128° N, 74.0060° W | A dense, world‑famous metropolis that sits on a series of islands (Manhattan, Staten Island, parts of the Bronx) and the mainland (Brooklyn and Queens). |\n\n### Quick geographic context\n- **On a map of the United States:** New York State is in the **Northeast** region, just east of the Great Lakes and north of Pennsylvania. \n- **From Washington, D.C.:** Travel roughly **225 mi (360 km) northeast**. \n- **From Boston, MA:** Travel about **215 mi (350 km) southwest**. \n- **From Toronto, Canada:** Travel about **500 mi (800 km) southeast**.\n\n### Travel tips\n- **By air:** Major airports include **John F. Kennedy International (JFK)**, **LaGuardia (LGA)**, and **Newark Liberty International (EWR)** (the latter is actually in New Jersey but serves the NYC metro area). \n- **By train:** Amtrak’s **Northeast Corridor** runs from **Boston → New York City → Washington, D.C.** \n- **By car:** Interstates **I‑87** (north‑south) and **I‑90** (east‑west) are the primary highways crossing the state.\n\n### Fun fact\n- The name “**New York**” was given by the English in 1664, honoring the Duke of York (later King James II). The city’s original Dutch name was **“New Amsterdam.”**\n\nIf you need more specific directions (e.g., how to get to a particular neighborhood, landmark, or the state capital **Albany**), just let me know!","reasoning_content":null,"tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null,"mm_embedding_handle":null,"disaggregated_params":null,"avg_decoded_tokens_per_iter":1.0}],"usage":{"prompt_tokens":72,"total_tokens":705,"completion_tokens":633},"prompt_token_ids":null} ``` diff --git a/examples/llm-api/llm_sampling.py b/examples/llm-api/llm_sampling.py index dcaeb552b70..225cb8ea362 100644 --- a/examples/llm-api/llm_sampling.py +++ b/examples/llm-api/llm_sampling.py @@ -137,6 +137,24 @@ def demonstrate_multiple_sequences(prompt: str): print(f"Sequence {i+1}: {output.text}") +def demonstrate_beam_search(prompt: str): + """Demonstrates beam search.""" + print("\n🎯 === BEAM SEARCH ===") + beam_width = 2 + llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + max_beam_width=beam_width) + + sampling_params = SamplingParams( + max_tokens=50, + use_beam_search=True, + n=beam_width, + ) + + response = llm.generate(prompt, sampling_params) + print(f"Prompt: {prompt}") + print(f"Response: {response.outputs[0].text}") + + def demonstrate_with_logprobs(prompt: str): """Demonstrates generation with log probabilities.""" print("\n📊 === GENERATION WITH LOG PROBABILITIES ===") @@ -173,9 +191,8 @@ def run_all_demonstrations(model_path: Optional[str] = None): demonstrate_top_k_sampling(demo_prompt) demonstrate_top_p_sampling(demo_prompt) demonstrate_combined_sampling(demo_prompt) - # TODO[Superjomn]: enable them once pytorch backend supports - # demonstrate_multiple_sequences(llm, demo_prompt) - # demonstrate_beam_search(demo_prompt) + demonstrate_multiple_sequences(demo_prompt) + demonstrate_beam_search(demo_prompt) demonstrate_with_logprobs(demo_prompt) print("\n🎉 All sampling demonstrations completed!") @@ -219,6 +236,8 @@ def main(model: Optional[str], demo: str, prompt: Optional[str]): demonstrate_combined_sampling(demo_prompt) elif demo == "multiple": demonstrate_multiple_sequences(demo_prompt) + elif demo == "beam": + demonstrate_beam_search(demo_prompt) elif demo == "logprobs": demonstrate_with_logprobs(demo_prompt) elif demo == "all": diff --git a/tensorrt_llm/functional.py b/tensorrt_llm/functional.py index 282febd262e..19e31d04cea 100755 --- a/tensorrt_llm/functional.py +++ b/tensorrt_llm/functional.py @@ -3279,8 +3279,6 @@ def identity(input: Tensor) -> Tensor: ''' Add an identity operation. - TODO: Document why it can be done using a plugin!!! - Parameters: input : Tensor The input tensor.