diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 2e462b0f6e..733084e8a4 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -73,7 +73,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k elif self.services[cur_node].service_type == ServiceType.LLM: # convert TGI/vLLM to unified OpenAI /v1/chat/completions format next_inputs = {} - next_inputs["model"] = LLM_MODEL + next_inputs["model"] = inputs["model"] next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] @@ -396,7 +396,7 @@ async def handle_request(self, request: Request): repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, chat_template=chat_request.chat_template if chat_request.chat_template else None, - model=chat_request.model if chat_request.model else None, + model=chat_request.model if chat_request.model else LLM_MODEL, ) retriever_parameters = RetrieverParms( search_type=chat_request.search_type if chat_request.search_type else "similarity", diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index af9afdf715..f95ec94e09 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -76,7 +76,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k elif self.services[cur_node].service_type == ServiceType.LLM: # convert TGI/vLLM to unified OpenAI /v1/chat/completions format next_inputs = {} - next_inputs["model"] = LLM_MODEL_ID + next_inputs["model"] = inputs["model"] next_inputs["messages"] = [{"role": "user", "content": inputs["query"]}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] @@ -195,6 +195,7 @@ async def handle_request(self, request: Request): repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, index_name=chat_request.index_name, + model=chat_request.model if chat_request.model else LLM_MODEL_ID, ) # Initialize the initial inputs with the generated prompt