From 857f33dfef8d011f5cabba6245945e56b6e85abb Mon Sep 17 00:00:00 2001 From: Vili Tajnic Date: Mon, 24 Nov 2025 14:48:04 +0100 Subject: [PATCH] Fix testbed evaluation bugs - Disable Giskard HTML report generation causing TypeError with mixed data types - Update evaluation report to show 'Auto-discovered' for vector stores - Fix file upload variable shadowing bug in testbed API endpoint - Improve error handling for missing vector search settings fields --- src/client/content/testbed.py | 10 +++++++--- src/server/api/utils/testbed.py | 2 +- src/server/api/v1/testbed.py | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/client/content/testbed.py b/src/client/content/testbed.py index aca1f317..39b86fde 100644 --- a/src/client/content/testbed.py +++ b/src/client/content/testbed.py @@ -86,13 +86,17 @@ def create_gauge(value): st.dataframe(ll_settings_reversed, hide_index=True) if report["settings"]["testbed"]["judge_model"]: st.markdown(f"**Judge Model**: {report['settings']['testbed']['judge_model']}") - if report["settings"]["vector_search"]["enabled"]: + if report["settings"]["vector_search"].get("discovery"): st.subheader("Vector Search Settings") st.markdown(f"""**Database**: {report["settings"]["database"]["alias"]}; - **Vector Store**: {report["settings"]["vector_search"]["vector_store"]} + **Vector Store**: Auto-discovered """) embed_settings = pd.DataFrame(report["settings"]["vector_search"], index=[0]) - embed_settings.drop(["vector_store", "alias", "enabled", "grading"], axis=1, inplace=True) + # Drop fields that shouldn't be displayed in the table (only drop if they exist) + fields_to_drop = ["vector_store", "alias"] + existing_fields = [f for f in fields_to_drop if f in embed_settings.columns] + if existing_fields: + embed_settings.drop(existing_fields, axis=1, inplace=True) if report["settings"]["vector_search"]["search_type"] == "Similarity": embed_settings.drop(["score_threshold", "fetch_k", "lambda_mult"], axis=1, inplace=True) st.dataframe(embed_settings, hide_index=True) diff --git a/src/server/api/utils/testbed.py b/src/server/api/utils/testbed.py index 5968ffd7..988ab911 100644 --- a/src/server/api/utils/testbed.py +++ b/src/server/api/utils/testbed.py @@ -304,7 +304,7 @@ def clean(orig_html): results = utils_databases.execute_sql(db_conn, sql, binds) report = pickle.loads(results[0]["RAG_REPORT"]) full_report = report.to_pandas() - html_report = report.to_html() + # html_report = report.to_html() # Disabled: causes TypeError with mixed data types by_topic = report.correctness_by_topic() failures = report.failures diff --git a/src/server/api/v1/testbed.py b/src/server/api/v1/testbed.py index 3d9d1f6a..23d66309 100644 --- a/src/server/api/v1/testbed.py +++ b/src/server/api/v1/testbed.py @@ -162,8 +162,8 @@ async def testbed_generate_qa( # Read and save file content filename = temp_directory / file.filename logger.info("Writing Q&A File to: %s", filename) - with open(filename, "wb") as file: - file.write(await file.read()) + with open(filename, "wb") as f: + f.write(await file.read()) # Process file for knowledge base and save test set test_set = utils_testbed.build_knowledge_base(