From 6c27094fc658f42cb6745afb220d842c633ba04f Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:43:58 +0100 Subject: [PATCH 01/53] add ollama to docker-compose --- docker-compose_ollama.yml | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 docker-compose_ollama.yml diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml new file mode 100644 index 000000000..020b12c96 --- /dev/null +++ b/docker-compose_ollama.yml @@ -0,0 +1,54 @@ +# Use this file as an override to add a local Ollama instance to your 4CAT stack. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +# +# Once running, configure 4CAT via the Control Panel → Settings → LLM: +# LLM Provider Type : ollama +# LLM Server URL : http://ollama:11434 +# +# GPU support (NVIDIA): +# Uncomment the `deploy.resources` block in the ollama service below and +# ensure the NVIDIA Container Toolkit is installed on your host. +# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +# +# GPU support (Apple Silicon / AMD): +# Pass the appropriate device through your host's Docker settings instead. +# Ollama will automatically detect the GPU when it is available inside the container. + +services: + ollama: + image: ollama/ollama:latest + container_name: 4cat_ollama + restart: unless-stopped + volumes: + - 4cat_ollama:/root/.ollama + # Expose the Ollama API on the host for optional external access or + # management with the Ollama CLI. Remove this block if you want to keep + # Ollama accessible only within the Docker network. + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "ollama", "ls"] + interval: 10s + timeout: 5s + retries: 5 + # --- NVIDIA GPU support (uncomment to enable) --- + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # Make the 4CAT backend wait for Ollama to be healthy before starting. + # This prevents initial model-refresh failures on first boot. + backend: + depends_on: + ollama: + condition: service_healthy + +volumes: + 4cat_ollama: + name: 4cat_ollama_data From 8a8427cd5a1bfc686189498b29ccba5d7c9a59d6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:44:39 +0100 Subject: [PATCH 02/53] give me a proper worker who can do neat stuff. --- backend/workers/ollama_manager.py | 171 ++++++++++++++++++++++++++++++ backend/workers/refresh_items.py | 55 +--------- 2 files changed, 173 insertions(+), 53 deletions(-) create mode 100644 backend/workers/ollama_manager.py diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py new file mode 100644 index 000000000..d990b8b14 --- /dev/null +++ b/backend/workers/ollama_manager.py @@ -0,0 +1,171 @@ +""" +Manage Ollama LLM models +""" +import json +import time + +import requests + +from backend.lib.worker import BasicWorker + + +class OllamaManager(BasicWorker): + """ + Manage Ollama LLM models + + Periodically refreshes the list of available models from an Ollama server. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + + Job remote_id: + - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-ollama" + max_workers = 1 + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-ollama-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + model_name = self.job.data["remote_id"] + + if task == "refresh": + self.refresh_models() + elif task == "pull": + success = self.pull_model(model_name) + if success: + self.refresh_models() + elif task == "delete": + success = self.delete_model(model_name) + if success: + self.refresh_models() + else: + self.log.warning(f"OllamaManager: unknown task '{task}'") + + self.job.finish() + + def _get_llm_headers(self): + """Build request headers for LLM server auth.""" + headers = {"Content-Type": "application/json"} + llm_api_key = self.config.get("llm.api_key", "") + llm_auth_type = self.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + return headers + + def refresh_models(self): + """ + Query the Ollama server for available models and update llm.available_models. + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + return + + headers = self._get_llm_headers() + available_models = {} + + try: + response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) + if response.status_code != 200: + self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") + return + + for model in response.json().get("models", []): + model_id = model["name"] + try: + meta = requests.post( + f"{llm_server}/api/show", + headers=headers, + json={"model": model_id}, + timeout=10 + ).json() + display_name = ( + f"{meta['model_info']['general.basename']}" + f" ({meta['details']['parameter_size']} parameters)" + ) + except (requests.RequestException, json.JSONDecodeError, KeyError) as e: + self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") + display_name = model_id + + available_models[model_id] = { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local" + } + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") + + def pull_model(self, model_name): + """ + Pull a model from the Ollama registry. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot pull model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + # stream=False waits for the pull to complete before returning + response = requests.post( + f"{llm_server}/api/pull", + headers=headers, + json={"model": model_name, "stream": False}, + timeout=600 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}") + return False + + def delete_model(self, model_name): + """ + Delete a model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot delete model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + response = requests.delete( + f"{llm_server}/api/delete", + headers=headers, + json={"model": model_name}, + timeout=30 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}") + return False diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 28eb73637..96a7da6b0 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -1,17 +1,14 @@ """ Refresh items """ -import json - -import requests - from backend.lib.worker import BasicWorker class ItemUpdater(BasicWorker): """ Refresh 4CAT items - Refreshes settings that are dependent on external factors + Refreshes settings that are dependent on external factors. + LLM model refreshing is handled by the OllamaManager worker. """ type = "refresh-items" max_workers = 1 @@ -21,58 +18,10 @@ def ensure_job(cls, config=None): """ Ensure that the refresher is always running - This is used to ensure that the refresher is always running, and if it is - not, it will be started by the WorkerManager. - :return: Job parameters for the worker """ return {"remote_id": "refresh-items", "interval": 60} def work(self): - # Refresh items - self.refresh_settings() - self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info']['general.basename']} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file From 89824e2d475e2ac043c2f198efb31db54bcaa743 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:53:21 +0100 Subject: [PATCH 03/53] ruff you mean --- backend/workers/ollama_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index d990b8b14..64808f52d 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -2,8 +2,6 @@ Manage Ollama LLM models """ import json -import time - import requests from backend.lib.worker import BasicWorker From e7aa9af35831ef719b5ae45939b94dba5c28fc17 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:00 +0100 Subject: [PATCH 04/53] add docker setup if ollama present --- docker/docker_setup.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docker/docker_setup.py b/docker/docker_setup.py index 450684602..aea641c12 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -207,6 +207,35 @@ def _format_host(host: str) -> str: f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" ) + # If an Ollama container is available on the Docker network, configure 4CAT to use it. + ollama_url = 'http://ollama:11434' + try: + import requests + try: + resp = requests.get(f"{ollama_url}/api/tags", timeout=2) + if resp.status_code == 200: + current_llm_server = config.get("llm.server") + if current_llm_server == ollama_url: + print("Ollama server already configured in 4CAT settings.") + elif current_llm_server and current_llm_server != ollama_url: + # Previously configured LLM server is different; log a warning but do not overwrite user settings + print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.") + else: + # set basic LLM settings so the initial admin user does not need to + # configure them manually for local development environments that + # include the Ollama sidecar. + config.set('llm.provider_type', 'ollama') + config.set('llm.server', ollama_url) + config.set('llm.access', True) + config.db.commit() + print('Detected Ollama on Docker network; configured LLM settings to use it.') + except requests.RequestException: + # Ollama not available; do nothing + pass + except Exception: + # requests other error; skip automatic Ollama configuration + pass + print(f"\nStarting app\n" f"4CAT is accessible at:\n" f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n") From 74e01b6263425f161e109d4a55e71bcacd659ee2 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:33 +0100 Subject: [PATCH 05/53] a useful frontend setting panel --- common/lib/config_definition.py | 10 +- webtool/__init__.py | 2 + webtool/templates/controlpanel/layout.html | 2 + .../templates/controlpanel/llm-server.html | 147 ++++++++++++++++++ webtool/views/views_llm.py | 104 +++++++++++++ 5 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 webtool/templates/controlpanel/llm-server.html create mode 100644 webtool/views/views_llm.py diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index aef363e04..daf20020d 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -621,7 +621,15 @@ "type": UserInput.OPTION_TEXT_JSON, "default": {}, "help": "Available LLM models", - "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.", + "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.", + "indirect": True, + "global": True + }, + "llm.enabled_models": { + "type": UserInput.OPTION_TEXT_JSON, + "default": [], + "help": "Enabled LLM models", + "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.", "indirect": True, "global": True }, diff --git a/webtool/__init__.py b/webtool/__init__.py index 54ac2072c..e807b7ae0 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -171,6 +171,7 @@ def time_this(func): import webtool.views.views_restart # noqa: E402 import webtool.views.views_admin # noqa: E402 import webtool.views.views_extensions # noqa: E402 + import webtool.views.views_llm # noqa: E402 import webtool.views.views_user # noqa: E402 import webtool.views.views_dataset # noqa: E402 import webtool.views.views_misc # noqa: E402 @@ -181,6 +182,7 @@ def time_this(func): app.register_blueprint(webtool.views.views_restart.component) app.register_blueprint(webtool.views.views_admin.component) app.register_blueprint(webtool.views.views_extensions.component) + app.register_blueprint(webtool.views.views_llm.component) app.register_blueprint(webtool.views.views_user.component) app.register_blueprint(webtool.views.views_dataset.component) app.register_blueprint(webtool.views.views_misc.component) diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index b0b0bc0b4..98d87220a 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -18,6 +18,8 @@ Jobs{% endif %} {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} + {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} + LLM Server{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html new file mode 100644 index 000000000..d8a7dbd53 --- /dev/null +++ b/webtool/templates/controlpanel/llm-server.html @@ -0,0 +1,147 @@ +{% extends "controlpanel/layout.html" %} + +{% block title %}LLM Server{% endblock %} +{% block body_class %}plain-page admin {{ body_class }}{% endblock %} +{% block subbreadcrumbs %}{% set navigation.sub = "llm" %}{% endblock %} + +{% block body %} +
+
+

LLM Server

+ + {% if flashes %} +
+ {% for notice in flashes %} +

{{ notice|safe }}

+ {% endfor %} +
+ {% endif %} + + {# Server status #} +
+ + + + + + + + + + + + + +
SettingValue
Server URL{% if llm_server %}{{ llm_server }}{% else %}Not configured{% endif %}
Status + {% if server_status == "online" %} + Online + {% elif server_status == "not configured" %} + Not configured + {% else %} + {{ server_status }} + {% endif %} +
+
+ + {# Available models #} +

Available Models +
+ + +
+

+ +
+ + + + + + + + + + + + + + {% if available_models %} + {% for model_id, model in available_models.items() %} + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} +
ModelDisplay nameStatusActions
+ {{ model_id }} + + {% if model.model_card %} + {{ model.name }} + {% else %} + {{ model.name }} + {% endif %} + + {% if model_id in enabled_models %} + Enabled + {% else %} + Disabled + {% endif %} + + {% if model_id in enabled_models %} +
+ + + +
+ {% else %} +
+ + + +
+ {% endif %} +
+ + + +
+
+ {% if llm_server %} + No models found. Use the Refresh button to fetch available models, or pull a new model below. + {% else %} + Configure the LLM server URL in Settings first. + {% endif %} +
+
+
+ + {# Pull a new model #} + {% if llm_server %} +
+

Pull Model

+

Enter an Ollama model name (e.g. llama3:8b) to pull it from the + Ollama library. + Pulling large models may take several minutes; the job runs in the background.

+
+ +
+ + +
+
+ +
+
+
+ {% endif %} +
+{% endblock %} diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py new file mode 100644 index 000000000..41f9df472 --- /dev/null +++ b/webtool/views/views_llm.py @@ -0,0 +1,104 @@ +""" +4CAT views for LLM server management +""" +import time + +import requests + +from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g +from flask_login import login_required + +from webtool.lib.helpers import setting_required, error + +component = Blueprint("llm", __name__) + + +@component.route("/admin/llm/", methods=["GET", "POST"]) +@login_required +@setting_required("privileges.admin.can_manage_settings") +def llm_panel(): + """ + LLM Server management panel + + Shows server status, available models, and controls to pull/delete/refresh + models. Pull, delete, and refresh operations are queued as OllamaManager + jobs rather than run synchronously. + """ + if not g.config.get("llm.access"): + return error(403, message="LLM access is not enabled on this server.") + + if request.method == "POST": + action = request.form.get("action", "").strip() + + if action == "refresh": + # Queue a one-time manual refresh job; use a timestamp-based remote_id + # so it is always accepted even if a periodic job already exists. + g.queue.add_job("manage-ollama", details={"task": "refresh"}, + remote_id=f"manage-ollama-manual-{int(time.time())}") + flash("Model refresh job queued.") + + elif action == "pull": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "pull"}, remote_id=model_name) + flash(f"Pull job queued for model '{model_name}'.") + else: + flash("Please provide a model name to pull.") + + elif action == "delete": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "delete"}, remote_id=model_name) + flash(f"Delete job queued for model '{model_name}'.") + + elif action == "enable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name not in enabled_models: + enabled_models.append(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' enabled.") + + elif action == "disable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name in enabled_models: + enabled_models.remove(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' disabled.") + + return redirect(url_for("llm.llm_panel")) + + # --- GET: render panel --- + + llm_server = g.config.get("llm.server", "") + server_status = "not configured" + + if llm_server: + headers = {"Content-Type": "application/json"} + llm_api_key = g.config.get("llm.api_key", "") + llm_auth_type = g.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + + try: + resp = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=5) + server_status = "online" if resp.status_code == 200 else f"error (HTTP {resp.status_code})" + except requests.Timeout: + server_status = "unreachable (timeout)" + except requests.RequestException as e: + server_status = f"unreachable ({e})" + + available_models = g.config.get("llm.available_models", {}) or {} + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + + return render_template( + "controlpanel/llm-server.html", + flashes=get_flashed_messages(), + llm_server=llm_server, + server_status=server_status, + available_models=available_models, + enabled_models=enabled_models, + ) From baec03a75257a8780796add212a9ce3826671dd6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:50:36 +0100 Subject: [PATCH 06/53] only show enabled models --- processors/machine_learning/llm_prompter.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 57d8ee496..959ff8f88 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -66,9 +66,15 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available if config.get("llm.access", False) and config.get("llm.server", ""): - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()} - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" + # Check some models enabled + shared_llm_enabled_models = config.get("llm.enabled_models", []) + shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items() if model in shared_llm_enabled_models} + if not shared_llm_models: + shared_llm_name = False + shared_llm_default = "" + else: + shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") + shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" else: shared_llm_name = False shared_llm_default = "" From 36fe0ed734184e1012033cb8469c2d96a9ca14a3 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:51:50 +0100 Subject: [PATCH 07/53] update docker readme so people can use ollama --- docker/README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/docker/README.md b/docker/README.md index 00f0862fc..de5dd318b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -42,3 +42,63 @@ https://github.com/docker/buildx/issues/426 https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0 4. More errors coming soon! (No doubt) + +--- + +## Running a local Ollama instance alongside 4CAT + +4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors. +A Docker Compose override file (`docker-compose_ollama.yml`) is included to add +Ollama as a sidecar service so you do not need to run it separately on the host. + +### Quick start + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +This starts the standard 4CAT stack plus an `ollama` container that is only +accessible within the Docker network (and optionally on `localhost:11434` on +the host via the exposed port). + +### Configuring 4CAT to use Ollama + +1. Log in as admin and open **Control Panel → Settings**. +2. Set the following LLM fields: + + | Setting | Value | + |---|---| + | LLM Provider Type | `ollama` | + | LLM Server URL | `http://ollama:11434` | + | LLM Access | enabled | + +3. Save settings. +4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled). +5. Use the **Refresh** button to load available models, then **Pull** a model + (e.g. `llama3.2:3b`) to download it from the Ollama library. +6. Enable the models you want to make available to users. + +### GPU support (NVIDIA) + +Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and +ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +is installed on your host. Then restart the stack with the override: + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +### Persisting models + +Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume. +They survive container restarts and re-creations unless you explicitly remove +the volume (`docker volume rm 4cat_ollama_data`). + +### Using an external Ollama server + +If you already run Ollama on the host or elsewhere, skip the override file and +point 4CAT directly at that server: + +- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. +- **Remote server**: use the server's reachable URL and configure any required + API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. From eb4b49a98126002100be6d16a0f879572bb1469f Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:56:32 +0100 Subject: [PATCH 08/53] Cleanup: stale enabled models, refresh_items scheduling, README auto-config docs (#581) * Initial plan * Fix stale enabled models, disable refresh_items scheduling, update README docs Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --- backend/workers/ollama_manager.py | 8 ++++++++ backend/workers/refresh_items.py | 15 +++++++-------- docker/README.md | 15 ++++++++++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 64808f52d..3c6950b59 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -104,6 +104,14 @@ def refresh_models(self): self.config.set("llm.available_models", available_models) self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + except requests.RequestException as e: self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 96a7da6b0..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -13,15 +13,14 @@ class ItemUpdater(BasicWorker): type = "refresh-items" max_workers = 1 - @classmethod - def ensure_job(cls, config=None): - """ - Ensure that the refresher is always running - - :return: Job parameters for the worker - """ - return {"remote_id": "refresh-items", "interval": 60} + # ensure_job is intentionally disabled: this worker currently does nothing + # and would only create unnecessary job queue churn. Re-enable when work() + # has actual tasks to perform. + # @classmethod + # def ensure_job(cls, config=None): + # return {"remote_id": "refresh-items", "interval": 60} def work(self): + # Placeholder – no tasks implemented yet. self.job.finish() \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index de5dd318b..31843b2ce 100644 --- a/docker/README.md +++ b/docker/README.md @@ -63,8 +63,17 @@ the host via the exposed port). ### Configuring 4CAT to use Ollama +#### Automatic configuration (fresh Docker install with sidecar) + +When you start 4CAT for the first time using the Ollama override file, the +`docker_setup.py` initialisation script automatically detects the `ollama` +sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access** +for you. You can skip to step 2 below. + +#### Manual configuration (or to verify/change settings) + 1. Log in as admin and open **Control Panel → Settings**. -2. Set the following LLM fields: +2. Confirm or set the following LLM fields: | Setting | Value | |---|---| @@ -102,3 +111,7 @@ point 4CAT directly at that server: - **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. - **Remote server**: use the server's reachable URL and configure any required API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. + +In both cases, configure the LLM settings manually via **Control Panel → Settings** +(see *Manual configuration* above), using the appropriate server URL instead of +`http://ollama:11434`. From 26f33f57d85ce0b7f468cfcff0e7bedb407a63ca Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 9 Apr 2026 17:57:31 +0200 Subject: [PATCH 09/53] ollama_manager: get additional info from ollama including capabilities --- backend/workers/ollama_manager.py | 112 ++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 38 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 3c6950b59..9cfa26d73 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -74,46 +74,82 @@ def refresh_models(self): try: response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code != 200: - self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") - return - - for model in response.json().get("models", []): - model_id = model["name"] - try: - meta = requests.post( - f"{llm_server}/api/show", - headers=headers, - json={"model": model_id}, - timeout=10 - ).json() - display_name = ( - f"{meta['model_info']['general.basename']}" - f" ({meta['details']['parameter_size']} parameters)" - ) - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") - display_name = model_id - - available_models[model_id] = { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local" - } - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - except requests.RequestException as e: self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") + return + + if response.status_code != 200: + self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") + return + + for model in response.json().get("models", []): + model_id = model["name"] + try: + meta = self.get_model_metadata(model_id) + except (requests.RequestException, json.JSONDecodeError, KeyError) as e: + self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") + meta = None + if meta: + display_name = ( + f"{meta['model_info']['general.basename']}" + f" ({meta['details']['parameter_size']} parameters)" + ) + success = True + else: + display_name = model_id + meta = {} + success = False + + available_models[model_id] = { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local", + "metadata_success": success, + "capabilities": meta.get("capabilities", []), + "details": meta.get("details", {}), + "modified_at": meta.get("modified_at", None), + } + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + + + def get_model_metadata(self, model_name): + """ + Get metadata for a specific model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return dict or None: Metadata dict on success, None on failure + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot get model metadata - no LLM server configured") + return None + + headers = self._get_llm_headers() + try: + response = requests.post( + f"{llm_server}/api/show", + headers=headers, + json={"model": model_name}, + timeout=10 + ) + if response.status_code == 200: + return response.json() + else: + self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - server returned {response.status_code}") + return None + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - request error: {e}") + return None def pull_model(self, model_name): """ From f2501b9c60e497ce77da04948e6718a229045f61 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 12:31:13 +0200 Subject: [PATCH 10/53] ollama_manager: display names / ollama get your api together --- backend/workers/ollama_manager.py | 127 +++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 9cfa26d73..c1f2b4572 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -3,6 +3,7 @@ """ import json import requests +import re from backend.lib.worker import BasicWorker @@ -61,6 +62,122 @@ def _get_llm_headers(self): headers[llm_auth_type] = llm_api_key return headers + @staticmethod + def _format_model_display_name(model_id, meta): + """ + Build a friendly display name for a model using metadata where possible. + Falls back to a sensible string derived from `model_id`. + + Dear Ollama: if you add a "display_name" field to your /api/show response, I will use it and not complain about missing metadata fields. Pretty please? :) + Because this is ridiculous. + """ + model_info = meta.get("model_info", {}) if meta else {} + details = meta.get("details", {}) if meta else {} + + # Basename preference: explicit metadata, else model id prefix + basename = None + for key in ("general.basename", "general.base_model.0.name"): + val = model_info.get(key) + if val: + basename = str(val).strip() + break + if not basename: + basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id + + # Helpers for parsing and formatting parameter counts + def _parse_param_count(val): + if val is None: + return None + if isinstance(val, int): + return val + if isinstance(val, float): + return int(val) + s = str(val).strip() + if not s: + return None + s = s.replace(",", "") + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) + if m: + num = float(m.group(1)) + suf = m.group(2).upper() + return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) + # try float / scientific + try: + return int(float(s)) + except Exception: + return None + + def _humanize(n): + if n is None: + return None + n = int(n) + if n >= 1_000_000_000: + x = n / 1_000_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith('.0'): + s = s[:-2] + return f"{s}B" + if n >= 1_000_000: + x = n / 1_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith('.0'): + s = s[:-2] + return f"{s}M" + return f"{n:,}" + + # Determine param count from prioritized fields + param_candidate = None + for key in ("parameter_size", "parameter_count"): + if key in details: + param_candidate = details.get(key) + break + if param_candidate is None: + param_candidate = model_info.get("general.parameter_count") + param_int = _parse_param_count(param_candidate) + human = _humanize(param_int) + + # Normalize size label if present + size_label = model_info.get("general.size_label") + size_label_norm = str(size_label).strip() if size_label else None + + # Extract tag (suffix after ':') if present + tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None + + # Decide suffix using tag-aware rules + suffix = None + if tag: + t = tag + tl = t.lower() + # Special handling for common tags that often indicate size or version + if tl in ("latest", "stable", "current"): + suffix = f"{t} · {human}" if human else t + # If tag looks like a size (e.g. "1b", "1.7B"), can use as suffix + else: + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", t) + if m: + # tag is a size like '1b' or '1.7B' + num = m.group(1) + suf = m.group(2).upper() + tag_size = f"{num}{suf}" + # prefer explicit size_label if it matches + if size_label_norm and size_label_norm.upper() == tag_size.upper(): + suffix = size_label_norm + else: + suffix = tag_size + else: + suffix = f"{t} · {human}" if human else t + else: + # No tag, so just use size if available + if size_label_norm: + suffix = size_label_norm + elif human: + suffix = human + else: + # Nothing useful to show; fallback to model id + return model_id + + return f"{basename} ({suffix})" + def refresh_models(self): """ Query the Ollama server for available models and update llm.available_models. @@ -90,10 +207,11 @@ def refresh_models(self): self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") meta = None if meta: - display_name = ( - f"{meta['model_info']['general.basename']}" - f" ({meta['details']['parameter_size']} parameters)" - ) + try: + display_name = self._format_model_display_name(model_id, meta) + except Exception as e: + self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") + display_name = model_id success = True else: display_name = model_id @@ -105,6 +223,7 @@ def refresh_models(self): "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", "provider": "local", "metadata_success": success, + "model_info": meta.get("model_info", {}), "capabilities": meta.get("capabilities", []), "details": meta.get("details", {}), "modified_at": meta.get("modified_at", None), From c72d0436b3836160a8a2c02249743d58dfc8be5e Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 14:56:29 +0200 Subject: [PATCH 11/53] Create OllamaClient to collect model info --- backend/workers/ollama_manager.py | 452 +++++++++--------------------- common/lib/ollama_client.py | 296 +++++++++++++++++++ 2 files changed, 423 insertions(+), 325 deletions(-) create mode 100644 common/lib/ollama_client.py diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index c1f2b4572..09dafea3a 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -1,332 +1,134 @@ """ Manage Ollama LLM models """ -import json -import requests -import re - from backend.lib.worker import BasicWorker +from common.lib.ollama_client import OllamaClient class OllamaManager(BasicWorker): - """ - Manage Ollama LLM models - - Periodically refreshes the list of available models from an Ollama server. - Can also pull or delete models on demand when queued with a specific task. - - Job details: - - task: "refresh" (default), "pull", or "delete" - - Job remote_id: - - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) - - For pull/delete: the model name to pull or delete - """ - type = "manage-ollama" - max_workers = 1 - - @classmethod - def ensure_job(cls, config=None): - """ - Ensure the daily refresh job is always scheduled - - :return: Job parameters for the worker - """ - return {"remote_id": "manage-ollama-refresh", "interval": 86400} - - def work(self): - task = self.job.details.get("task", "refresh") if self.job.details else "refresh" - model_name = self.job.data["remote_id"] - - if task == "refresh": - self.refresh_models() - elif task == "pull": - success = self.pull_model(model_name) - if success: - self.refresh_models() - elif task == "delete": - success = self.delete_model(model_name) - if success: - self.refresh_models() - else: - self.log.warning(f"OllamaManager: unknown task '{task}'") - - self.job.finish() - - def _get_llm_headers(self): - """Build request headers for LLM server auth.""" - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - return headers - - @staticmethod - def _format_model_display_name(model_id, meta): - """ - Build a friendly display name for a model using metadata where possible. - Falls back to a sensible string derived from `model_id`. - - Dear Ollama: if you add a "display_name" field to your /api/show response, I will use it and not complain about missing metadata fields. Pretty please? :) - Because this is ridiculous. - """ - model_info = meta.get("model_info", {}) if meta else {} - details = meta.get("details", {}) if meta else {} - - # Basename preference: explicit metadata, else model id prefix - basename = None - for key in ("general.basename", "general.base_model.0.name"): - val = model_info.get(key) - if val: - basename = str(val).strip() - break - if not basename: - basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id - - # Helpers for parsing and formatting parameter counts - def _parse_param_count(val): - if val is None: - return None - if isinstance(val, int): - return val - if isinstance(val, float): - return int(val) - s = str(val).strip() - if not s: - return None - s = s.replace(",", "") - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) - if m: - num = float(m.group(1)) - suf = m.group(2).upper() - return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) - # try float / scientific - try: - return int(float(s)) - except Exception: - return None - - def _humanize(n): - if n is None: - return None - n = int(n) - if n >= 1_000_000_000: - x = n / 1_000_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith('.0'): - s = s[:-2] - return f"{s}B" - if n >= 1_000_000: - x = n / 1_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith('.0'): - s = s[:-2] - return f"{s}M" - return f"{n:,}" - - # Determine param count from prioritized fields - param_candidate = None - for key in ("parameter_size", "parameter_count"): - if key in details: - param_candidate = details.get(key) - break - if param_candidate is None: - param_candidate = model_info.get("general.parameter_count") - param_int = _parse_param_count(param_candidate) - human = _humanize(param_int) - - # Normalize size label if present - size_label = model_info.get("general.size_label") - size_label_norm = str(size_label).strip() if size_label else None - - # Extract tag (suffix after ':') if present - tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - - # Decide suffix using tag-aware rules - suffix = None - if tag: - t = tag - tl = t.lower() - # Special handling for common tags that often indicate size or version - if tl in ("latest", "stable", "current"): - suffix = f"{t} · {human}" if human else t - # If tag looks like a size (e.g. "1b", "1.7B"), can use as suffix - else: - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", t) - if m: - # tag is a size like '1b' or '1.7B' - num = m.group(1) - suf = m.group(2).upper() - tag_size = f"{num}{suf}" - # prefer explicit size_label if it matches - if size_label_norm and size_label_norm.upper() == tag_size.upper(): - suffix = size_label_norm - else: - suffix = tag_size - else: - suffix = f"{t} · {human}" if human else t - else: - # No tag, so just use size if available - if size_label_norm: - suffix = size_label_norm - elif human: - suffix = human - else: - # Nothing useful to show; fallback to model id - return model_id - - return f"{basename} ({suffix})" - - def refresh_models(self): - """ - Query the Ollama server for available models and update llm.available_models. - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - return - - headers = self._get_llm_headers() - available_models = {} - - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") - return - - if response.status_code != 200: - self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") - return - - for model in response.json().get("models", []): - model_id = model["name"] - try: - meta = self.get_model_metadata(model_id) - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") - meta = None - if meta: - try: - display_name = self._format_model_display_name(model_id, meta) - except Exception as e: - self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") - display_name = model_id - success = True - else: - display_name = model_id - meta = {} - success = False - - available_models[model_id] = { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local", - "metadata_success": success, - "model_info": meta.get("model_info", {}), - "capabilities": meta.get("capabilities", []), - "details": meta.get("details", {}), - "modified_at": meta.get("modified_at", None), - } - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - - - def get_model_metadata(self, model_name): - """ - Get metadata for a specific model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return dict or None: Metadata dict on success, None on failure - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot get model metadata - no LLM server configured") - return None - - headers = self._get_llm_headers() - try: - response = requests.post( - f"{llm_server}/api/show", - headers=headers, - json={"model": model_name}, - timeout=10 - ) - if response.status_code == 200: - return response.json() - else: - self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - server returned {response.status_code}") - return None - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - request error: {e}") - return None - - def pull_model(self, model_name): - """ - Pull a model from the Ollama registry. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot pull model - no LLM server configured") - return False - - headers = self._get_llm_headers() - try: - # stream=False waits for the pull to complete before returning - response = requests.post( - f"{llm_server}/api/pull", - headers=headers, - json={"model": model_name, "stream": False}, - timeout=600 - ) - if response.status_code == 200: - self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") - return True - else: - self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}") - return False - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}") - return False - - def delete_model(self, model_name): - """ - Delete a model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot delete model - no LLM server configured") - return False - - headers = self._get_llm_headers() - try: - response = requests.delete( - f"{llm_server}/api/delete", - headers=headers, - json={"model": model_name}, - timeout=30 - ) - if response.status_code == 200: - self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") - return True - else: - self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}") - return False - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}") - return False + """ + Manage Ollama LLM models + + Periodically refreshes the list of available models from an Ollama server. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + + Job remote_id: + - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-ollama" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-ollama-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + model_name = self.job.data["remote_id"] + + if task == "refresh": + self.refresh_models() + elif task == "pull": + success = self.pull_model(model_name) + if success: + self.refresh_models() + elif task == "delete": + success = self.delete_model(model_name) + if success: + self.refresh_models() + else: + self.log.warning(f"OllamaManager: unknown task '{task}'") + + self.job.finish() + + def _get_client(self) -> OllamaClient: + """Return a fresh OllamaClient configured from 4CAT settings.""" + if not self.client: + self.client = OllamaClient.from_config(self.config) + return self.client + + def refresh_models(self): + """ + Query the Ollama server for available models and update llm.available_models. + """ + if not self.config.get("llm.server", ""): + return + + client = self._get_client() + models = client.list_models() + + if not models and not self.config.get("llm.server", ""): + return + + available_models = {} + for model in models: + model_id = model["name"] + meta = client.show_model(model_id) + if meta: + try: + display_name = OllamaClient.format_display_name(model_id, meta) + except Exception as e: + self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") + display_name = model_id + else: + self.log.debug(f"OllamaManager: could not get metadata for {model_id}, using name only") + display_name = model_id + + available_models[model_id] = OllamaClient.build_model_entry(model_id, display_name, meta) + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + + def pull_model(self, model_name): + """ + Pull a model from the Ollama registry. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + if not self.config.get("llm.server", ""): + self.log.warning("OllamaManager: cannot pull model - no LLM server configured") + return False + + success = self._get_client().pull_model(model_name) + if success: + self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") + else: + self.log.warning(f"OllamaManager: could not pull model '{model_name}'") + return success + + def delete_model(self, model_name): + """ + Delete a model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + if not self.config.get("llm.server", ""): + self.log.warning("OllamaManager: cannot delete model - no LLM server configured") + return False + + success = self._get_client().delete_model(model_name) + if success: + self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") + else: + self.log.warning(f"OllamaManager: could not delete model '{model_name}'") + return success diff --git a/common/lib/ollama_client.py b/common/lib/ollama_client.py new file mode 100644 index 000000000..96438a2f1 --- /dev/null +++ b/common/lib/ollama_client.py @@ -0,0 +1,296 @@ +""" +Centralized HTTP client for communicating with an Ollama server. + +This class owns all direct HTTP calls to Ollama's REST API and provides shared static +helpers for capability parsing, display-name formatting, and building canonical +llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. +""" + +import re +import requests + +from typing import Optional + + +class OllamaClient: + """ + HTTP client for an Ollama server. + + :param base_url: Base URL of the Ollama server (e.g. "http://localhost:11434"). + :param api_key: Optional API key for authentication. + :param auth_type: Header name to use for the API key (e.g. "Authorization"). + :param timeout: Default request timeout in seconds. + """ + + def __init__(self, base_url: str, api_key: Optional[str] = None, + auth_type: Optional[str] = None, timeout: int = 10) -> None: + self.base_url = base_url.rstrip("/") + self.api_key = api_key + self.auth_type = auth_type + self.timeout = timeout + self._session = requests.Session() + + def _headers(self) -> dict: + """Build request headers, including auth if configured.""" + headers = {"Content-Type": "application/json"} + if self.api_key and self.auth_type: + headers[self.auth_type] = self.api_key + return headers + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts from ``/api/tags``, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}/api/tags", + headers=self._headers(), + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get("models", []) + except requests.RequestException: + pass + return [] + + def show_model(self, model_id: str) -> dict | None: + """Fetch full metadata for a model via ``POST /api/show``. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: Parsed response dict, or ``None`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/show", + headers=self._headers(), + json={"model": model_id}, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json() + except requests.RequestException: + pass + return None + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers(), + json={"model": model_id, "stream": stream}, + timeout=600, + ) + return r.status_code == 200 + except requests.RequestException: + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers(), + json={"model": model_id}, + timeout=30, + ) + return r.status_code == 200 + except requests.RequestException: + return False + + @staticmethod + def parse_supported_media_types(meta: dict | None) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + if meta is None: + return [] + + capabilities = meta.get("capabilities", []) + media_types: list[str] = [] + + _cap_map = { + "completion": "text", + "vision": "image", + "embedding": "embedding", + } + for cap in capabilities: + mapped = _cap_map.get(cap) + if mapped and mapped not in media_types: + media_types.append(mapped) + + # Fallback: GGUF-level vision signals when capabilities list gives no image info + if "image" not in media_types: + details = meta.get("details", {}) + model_info = meta.get("model_info", {}) + projector_info = meta.get("projector_info") + + has_clip_family = "clip" in (details.get("families") or []) + has_vision_keys = any(k.startswith("vision.") for k in model_info) + has_projector = bool(projector_info) + + if has_clip_family or has_vision_keys or has_projector: + media_types.append("image") + + return media_types + + @staticmethod + def format_display_name(model_id: str, meta: dict | None) -> str: + """Build a human-readable display name for a model. + + Logic is identical to the legacy ``OllamaManager._format_model_display_name`` + and has been moved here so it can be shared across OllamaManager and any + other caller without importing the worker class. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_info = meta.get("model_info", {}) if meta else {} + details = meta.get("details", {}) if meta else {} + + basename = None + for key in ("general.basename", "general.base_model.0.name"): + val = model_info.get(key) + if val: + basename = str(val).strip() + break + if not basename: + basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id + + def _parse_param_count(val): + if val is None: + return None + if isinstance(val, int): + return val + if isinstance(val, float): + return int(val) + s = str(val).strip().replace(",", "") + if not s: + return None + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) + if m: + num = float(m.group(1)) + suf = m.group(2).upper() + return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) + try: + return int(float(s)) + except Exception: + return None + + def _humanize(n): + if n is None: + return None + n = int(n) + if n >= 1_000_000_000: + x = n / 1_000_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith(".0"): + s = s[:-2] + return f"{s}B" + if n >= 1_000_000: + x = n / 1_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith(".0"): + s = s[:-2] + return f"{s}M" + return f"{n:,}" + + param_candidate = None + for key in ("parameter_size", "parameter_count"): + if key in details: + param_candidate = details.get(key) + break + if param_candidate is None: + param_candidate = model_info.get("general.parameter_count") + human = _humanize(_parse_param_count(param_candidate)) + + size_label = model_info.get("general.size_label") + size_label_norm = str(size_label).strip() if size_label else None + + tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None + + suffix = None + if tag: + tl = tag.lower() + if tl in ("latest", "stable", "current"): + suffix = f"{tag} · {human}" if human else tag + else: + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", tag) + if m: + tag_size = f"{m.group(1)}{m.group(2).upper()}" + if size_label_norm and size_label_norm.upper() == tag_size.upper(): + suffix = size_label_norm + else: + suffix = tag_size + else: + suffix = f"{tag} · {human}" if human else tag + else: + if size_label_norm: + suffix = size_label_norm + elif human: + suffix = human + else: + return model_id + + return f"{basename} ({suffix})" + + @staticmethod + def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> dict: + """Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw Ollama model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + has_meta = bool(meta) + return { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local", + "metadata_success": has_meta, + "model_info": meta.get("model_info", {}) if has_meta else {}, + "capabilities": meta.get("capabilities", []) if has_meta else [], + "details": meta.get("details", {}) if has_meta else {}, + "modified_at": meta.get("modified_at", None) if has_meta else None, + "supported_media_types": OllamaClient.parse_supported_media_types(meta), + } + + @classmethod + def from_config(cls, config) -> "OllamaClient": + """Instantiate an OllamaClient from 4CAT config. + + Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. + + :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. + :returns: Configured ``OllamaClient``. + """ + return cls( + base_url=config.get("llm.server", ""), + api_key=config.get("llm.api_key", "") or None, + auth_type=config.get("llm.auth_type", "") or None, + ) From a79657b8fd8076f10b0ec1437c60b29e3c21c70e Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 16:03:40 +0200 Subject: [PATCH 12/53] list capabilities in admin panel --- webtool/templates/controlpanel/llm-server.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index d8a7dbd53..22a5573dd 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -62,6 +62,7 @@

Available Models Model Display name + Capabilities Status Actions @@ -78,6 +79,9 @@

Available Models {{ model.name }} {% endif %} + + {{ model.capabilities | join(", ") }} + {% if model_id in enabled_models %} Enabled From 43de49b1f1406dddfe62ab4559a39945ce7af583 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Wed, 15 Apr 2026 12:55:19 +0200 Subject: [PATCH 13/53] ollama_manager: check for connection first, ollama_client: accept logger --- backend/workers/ollama_manager.py | 9 ++++-- common/lib/ollama_client.py | 49 ++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 09dafea3a..91b8bfac1 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -36,6 +36,11 @@ def work(self): task = self.job.details.get("task", "refresh") if self.job.details else "refresh" model_name = self.job.data["remote_id"] + self.client = self._get_client() # Initialize client once per job run + if not self.client.is_available(): + self.job.finish() + return + if task == "refresh": self.refresh_models() elif task == "pull": @@ -54,7 +59,7 @@ def work(self): def _get_client(self) -> OllamaClient: """Return a fresh OllamaClient configured from 4CAT settings.""" if not self.client: - self.client = OllamaClient.from_config(self.config) + self.client = OllamaClient.from_config(self.config, log=self.log) return self.client def refresh_models(self): @@ -64,7 +69,7 @@ def refresh_models(self): if not self.config.get("llm.server", ""): return - client = self._get_client() + client = self._get_client() models = client.list_models() if not models and not self.config.get("llm.server", ""): diff --git a/common/lib/ollama_client.py b/common/lib/ollama_client.py index 96438a2f1..5d7bea1da 100644 --- a/common/lib/ollama_client.py +++ b/common/lib/ollama_client.py @@ -23,12 +23,13 @@ class OllamaClient: """ def __init__(self, base_url: str, api_key: Optional[str] = None, - auth_type: Optional[str] = None, timeout: int = 10) -> None: + auth_type: Optional[str] = None, timeout: int = 10, log=None) -> None: self.base_url = base_url.rstrip("/") self.api_key = api_key self.auth_type = auth_type self.timeout = timeout self._session = requests.Session() + self.log = log def _headers(self) -> dict: """Build request headers, including auth if configured.""" @@ -36,6 +37,22 @@ def _headers(self) -> dict: if self.api_key and self.auth_type: headers[self.auth_type] = self.api_key return headers + + def is_available(self) -> bool: + """Check if the Ollama server is reachable and responding to /api/tags.""" + try: + r = self._session.get( + f"{self.base_url}/api/tags", + headers=self._headers(), + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning(f"OllamaClient: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: server is not available at {self.base_url}: {e}") + return False def list_models(self) -> list[dict]: """List available models from the Ollama server. @@ -50,8 +67,11 @@ def list_models(self) -> list[dict]: ) if r.status_code == 200: return r.json().get("models", []) - except requests.RequestException: - pass + if self.log: + self.log.warning(f"OllamaClient: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to list models from {self.base_url}: {e}") return [] def show_model(self, model_id: str) -> dict | None: @@ -69,8 +89,11 @@ def show_model(self, model_id: str) -> dict | None: ) if r.status_code == 200: return r.json() - except requests.RequestException: - pass + if self.log: + self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}: {e}") return None def pull_model(self, model_id: str, stream: bool = False) -> bool: @@ -87,8 +110,12 @@ def pull_model(self, model_id: str, stream: bool = False) -> bool: json={"model": model_id, "stream": stream}, timeout=600, ) + if r.status_code != 200 and self.log: + self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 - except requests.RequestException: + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}: {e}") return False def delete_model(self, model_id: str) -> bool: @@ -104,8 +131,12 @@ def delete_model(self, model_id: str) -> bool: json={"model": model_id}, timeout=30, ) + if r.status_code != 200 and self.log: + self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 - except requests.RequestException: + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}: {e}") return False @staticmethod @@ -281,16 +312,18 @@ def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> di } @classmethod - def from_config(cls, config) -> "OllamaClient": + def from_config(cls, config, log=None) -> "OllamaClient": """Instantiate an OllamaClient from 4CAT config. Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. + :param log: A logging instance for reporting issues. :returns: Configured ``OllamaClient``. """ return cls( base_url=config.get("llm.server", ""), api_key=config.get("llm.api_key", "") or None, auth_type=config.get("llm.auth_type", "") or None, + log=log, ) From c8da75f438a8f29b935029089cf55f9262c80de6 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 19 May 2026 11:59:40 +0200 Subject: [PATCH 14/53] Multi-form! --- .jshintrc | 8 ++ common/lib/config_definition.py | 94 ++++++++++-------- common/lib/user_input.py | 38 ++++++++ extensions | 1 + processors/metrics/rank_attribute.py | 3 +- webtool/__init__.py | 4 + webtool/lib/template_filters.py | 23 +++++ webtool/static/css/stylesheet.css | 31 ++++++ webtool/static/js/fourcat.js | 1 + webtool/static/js/modules/multi-form.js | 95 +++++++++++++++++++ webtool/static/js/modules/ui-helpers.js | 2 +- webtool/static/js/modules/util.js | 22 +++++ .../components/datasource-option.html | 32 ++++++- webtool/views/views_admin.py | 8 +- 14 files changed, 314 insertions(+), 48 deletions(-) create mode 100644 .jshintrc create mode 120000 extensions create mode 100644 webtool/static/js/modules/multi-form.js diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 000000000..7a6412586 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,8 @@ +{ + "esversion": 11, + "undef": true, + "globals": { + "$": false, + "document": false, + } +} \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index daf20020d..c64f8d633 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -443,16 +443,30 @@ "global": True }, "flask.autologin.hostnames": { - "type": UserInput.OPTION_TEXT_JSON, + "type": UserInput.OPTION_MULTI_OPTION, "default": [], + "options": { + "hostname": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Host name or IP address" + } + }, "help": "White-listed hostnames", "tooltip": "A list of host names or IP addresses to automatically log in. Docker should include localhost and " "Server Name. Front-end needs to be restarted for changed to apply.", "global": True }, "flask.autologin.api": { - "type": UserInput.OPTION_TEXT_JSON, + "type": UserInput.OPTION_MULTI_OPTION, "default": [], + "options": { + "hostname": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Host name or IP address" + } + }, "help": "White-list for API", "tooltip": "A list of host names or IP addresses to allow access to API endpoints with no rate limiting. " "Docker should include localhost and Server Name. Front-end needs to be restarted for changed to " @@ -579,43 +593,47 @@ "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." }, - "llm.host_name": { - "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", - "help": "Name of LLM Server in UI", - "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", - "global": True - }, - "llm.provider_type": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM Provider Type", - "default": "none", - "options": { - "ollama": "Ollama", - "none": "None", - }, - "global": True, - }, - "llm.server": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", - "global": True - }, - "llm.auth_type": { - "type": UserInput.OPTION_TEXT, - "help": "LLM Server Authentication Type", - "default": "", - "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.", + "llm.services": { + "type": UserInput.OPTION_MULTI_OPTION, + "default": [], "global": True, - }, - "llm.api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server API Key", - "tooltip": "The API key to access the LLM server, if required.", - "global": True + "help": "LLM providers", + "options": { + "host_name": { + "type": UserInput.OPTION_TEXT, + "default": "4CAT LLM Server", + "help": "Name of LLM Server in UI", + "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", + }, + "provider_type": { + "type": UserInput.OPTION_CHOICE, + "help": "LLM Provider Type", + "default": "none", + "options": { + "ollama": "Ollama", + "litellm": "LiteLLM", + "none": "None", + }, + }, + "server": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "LLM Server URL", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", + }, + "auth_type": { + "type": UserInput.OPTION_TEXT, + "help": "Authentication Header", + "default": "", + "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", + }, + "api_key": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Authentication Key", + "tooltip": "The API key to access the LLM server, if required.", + }, + } }, "llm.available_models": { "type": UserInput.OPTION_TEXT_JSON, diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 7fcb6bcb9..16a583d74 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -26,6 +26,7 @@ class UserInput: OPTION_TEXT = "string" # simple string or integer (input text) OPTION_MULTI = "multi" # multiple values out of a list (select multiple) OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) + OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls OPTION_INFO = "info" # just a bit of text, not actual input OPTION_TEXT_LARGE = "textarea" # longer text OPTION_TEXT_JSON = "json" # text, but should be valid JSON @@ -70,6 +71,8 @@ def parse_all(options, input, silently_correct=True): if type(input) is not dict and type(input) is not ImmutableMultiDict: raise TypeError("input must be a dictionary or ImmutableMultiDict") + print(input) + if type(input) is ImmutableMultiDict: # we are not using to_dict, because that messes up multi-selects input = {key: input.getlist(key) for key in input} @@ -181,6 +184,41 @@ def parse_all(options, input, silently_correct=True): parsed_input[option] = table_input + elif settings.get("type") == UserInput.OPTION_MULTI_OPTION: + # these are collections of other input options that can be + # repeated an arbitrary amount of times and are saved as a + # list of these values + # i.e. forms within forms!!! + item_options = settings["options"] + input_items = {} + for key, value in input.items(): + if key_match := re.match(f"{option}-([0-9]+)-(.+)", key): + input_index = int(key_match[1]) + # note: the index is just used to match inputs to items + # it is not used for ordering + option_item = key_match[2] + if option_item not in item_options: + continue + + if input_index not in input_items: + input_items[input_index] = {} + + print(key, value) + input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) + + # discard items that are only default values + parsed_input[option] = [] + for input_index, item in input_items.items(): + only_default = True + for key, value in item.items(): + if value != item_options[key]["default"]: + only_default = False + + if not only_default: + parsed_input[option].append(item) + + print(parsed_input[option]) + elif option not in input: # not provided? use default parsed_input[option] = settings.get("default", None) diff --git a/extensions b/extensions new file mode 120000 index 000000000..c25d13e68 --- /dev/null +++ b/extensions @@ -0,0 +1 @@ +/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions \ No newline at end of file diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 2f50d623a..3c7c03286 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -86,7 +86,8 @@ def get_options(cls, parent_dataset=None, config=None): "hostnames": "Domain names", "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", "hashtags": "Hashtags (words starting with #)", - "emoji": "Emoji (each used emoji in the column is counted individually)" + "emoji": "Emoji (each used emoji in the column is counted individually)", + "occurrence": "Values (the number of comma-separated values in the given field)" }, "help": "Extract from column", "tooltip": "This can be used to extract more specific values from the value of the selected column(s); for " diff --git a/webtool/__init__.py b/webtool/__init__.py index e807b7ae0..e7359c423 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -241,6 +241,10 @@ def get_datasource_explorer_templates(name): [FileSystemLoader(template_paths), FunctionLoader(get_datasource_explorer_templates)] ) + # enable to {% do %} tag in jinja + app.jinja_env.add_extension("jinja2.ext.do") + app.jinja_env.add_extension("jinja2.ext.debug") + # import custom jinja2 template filters # these also benefit from current_app import webtool.lib.template_filters # noqa: E402 diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 5682c6dbc..81b48ad55 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -195,6 +195,25 @@ def _jinja2_filter_markdown(text, trim_container=False): def _jinja2_filter_isbool(value): return isinstance(value, bool) +@current_app.template_filter('propmap') +def _jinja2_filter_propmap(data, property, default=None): + """ + Select a property from a sequence of dicts + + To map `{a: b: {prop: value}}` to `{a: value}` for a given `prop`. If + `data` is a dict, preserve key:value pairs. If the property does not exist + in a sequence item, use the `default` value. + + :param data: Sequence or dict to map + :param property: Property to use for mapping + :param default: Value to use if property does not exist in item + :return: Mapped sequence or dict + """ + if type(data) is dict: + return {k: v.get(property, default) for k, v in data.items()} + else: + return [v.get(property, default) for v in data.values()] + @current_app.template_filter('json') def _jinja2_filter_json(data): return json.dumps(data) @@ -429,6 +448,10 @@ def explorer_css(datasource, scope_class="explorer-content-container"): def _jinja2_filter_hasattr(obj, attribute): return hasattr(obj, attribute) +@current_app.template_filter('debug') +def _jinja2_filter_debug(value): + print(value) + @current_app.context_processor def inject_now(): def uniqid(): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index fecc5cfd7..a4097952f 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -330,6 +330,37 @@ article.small .form-element select[multiple] { top: -0.4em; } +.form-multi-option-wrapper, .form-multi-option-header { + margin: 0 1em; +} + +.form-multi-option-wrapper { + margin: 1em; +} + +.form-multi-option-header { + +} + +.form-multi-option-wrapper li { + border-left: 2px solid var(--accent); + margin-bottom: 4px; + position: relative; + margin-left: 20px; +} + +.form-multi-option-wrapper li::before { + content: ''; + position: absolute; + top: 0; + left: -17px; + border-left: 2px solid var(--contrast-dark); + border-bottom: 2px solid var(--contrast-dark); + width: 15px; + height: 15px; + +} + .data-overview-link { display: block; font-size: 13px; diff --git a/webtool/static/js/fourcat.js b/webtool/static/js/fourcat.js index e84dc8374..c4fd3762c 100644 --- a/webtool/static/js/fourcat.js +++ b/webtool/static/js/fourcat.js @@ -6,6 +6,7 @@ async function load() { await import("./modules/dataset-page.js"), await import("./modules/dynamic-container.js"), await import("./modules/multichoice.js"), + await import("./modules/multi-form.js"), await import("./modules/popup.js"), await import("./modules/run-processor.js"), await import("./modules/tooltip.js"), diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js new file mode 100644 index 000000000..389f5d9a8 --- /dev/null +++ b/webtool/static/js/modules/multi-form.js @@ -0,0 +1,95 @@ +import {find_parent, reset_form_elements} from "./util.js"; + +export const multiForm = { + init: function () { + const actions = document.createElement('div'); + actions.className = 'multi-form-actions'; + + const add_button = document.createElement('button'); + add_button.className = 'add-button'; + add_button.textContent = '+'; + add_button.addEventListener('click', multiForm.add_item); + + const delete_button = document.createElement('button'); + delete_button.className = 'delete-button'; + delete_button.textContent = 'x'; + delete_button.addEventListener('click', multiForm.delete_item); + + actions.appendChild(add_button); + actions.appendChild(delete_button); + + document.querySelectorAll('.form-multi-option-wrapper').forEach(function (el) { + el.addEventListener('click', multiForm.handle_click); + el.querySelectorAll('li').forEach(function (el) { + const el_actions = actions.cloneNode(true); + el.appendChild(el_actions); + }) + }) + }, + + handle_click: function (e) { + if(!(e.target.classList.contains('add-button') || e.target.classList.contains('delete-button'))) { + return true; + } + e.preventDefault(); + const wrapper = find_parent(e.target, 'ol'); + if(e.target.classList.contains('delete-button')){ + multiForm.delete_item(e); + } else { + multiForm.add_item(e); + } + multiForm.renumber(wrapper); + }, + + add_item: function (e) { + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + const last_li = find_parent(e.target, 'li'); + const clone = last_li.cloneNode(true); + reset_form_elements(clone) + ol.appendChild(clone); + }, + + delete_item: function (e) { + if(!confirm("Are you sure?")){ + return false; + } + + const li = find_parent(e.target, 'li'); + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + + if(ol.querySelectorAll('li').length > 1) { + li.parentNode.removeChild(li); + } else { + // last element; do not remove, but reset to default + reset_form_elements(li); + } + }, + + renumber: function(parent) { + let index = 1; + parent.querySelectorAll('li').forEach(function (el) { + el.setAttribute('data-multi-option-index', index); + multiForm.renumber_items(el, index); + index += 1; + }) + }, + + renumber_items: function(parent, index) { + const attributes = ['for', 'id', 'name']; + parent.childNodes.forEach(child => { + if (!(child instanceof HTMLElement)) { + return; + } + for(const attribute of attributes) { + console.log(child); + if(child.hasAttribute(attribute)) { + child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); + } + } + multiForm.renumber_items(child, index); + }); + } + +} + +export const module = multiForm; \ No newline at end of file diff --git a/webtool/static/js/modules/ui-helpers.js b/webtool/static/js/modules/ui-helpers.js index 8b0ef097f..4c9b31ebe 100644 --- a/webtool/static/js/modules/ui-helpers.js +++ b/webtool/static/js/modules/ui-helpers.js @@ -1,5 +1,5 @@ import {popup} from "./popup.js"; -import {find_parent} from "./util.js"; +import {find_parent, hsv2hsl} from "./util.js"; export const ui_helpers = { /** diff --git a/webtool/static/js/modules/util.js b/webtool/static/js/modules/util.js index 6f17d4ec9..1839c13f1 100644 --- a/webtool/static/js/modules/util.js +++ b/webtool/static/js/modules/util.js @@ -219,4 +219,26 @@ export function hsv2rgb(h, s, v) { } return [r * 255, g * 255, b * 255]; +} + +/** + * Recursively reset form elements to default value + * + * We don't use form.reset() for two reasons: + * - We may want to reset a subset of a form + * - The *current* value of an element may not be the designated *default* + * value of the element + * + * @param parent Parent node to recursively reset child nodes of + */ +export function reset_form_elements(parent) { + parent.childNodes.forEach(child => { + if(child instanceof HTMLElement) { + if (child.hasAttribute('data-default')) { + child.value = child.getAttribute('data-default'); + } else { + reset_form_elements(child); + } + } + }) } \ No newline at end of file diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 2a3cbb4a9..867a339c8 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -5,6 +5,28 @@
{{ settings.help|markdown|safe }}
{% elif settings.type in ["annotation", "annotations"] %} {# pass - a datasource should never need to annotate itself; this is for processor options only #} + {% elif settings.type == "multi_option" %} +
+

{{ settings.help }}

+
+
    + {# always include an empty item #} + {% set empty_item = settings.options|propmap("default") %} + {% do settings.default.append(empty_item) %} + {% for item in settings.default %} + {% set outerloop = loop %} + {% set last_index = outerloop.index %} +
  1. + {% for sub_option, sub_settings in settings.options.items() %} + {% do sub_settings.update({"original_default": sub_settings.default, "default": item[sub_option]}) %} + {% set sub_option = option ~ "-" ~ outerloop.index ~ "-" ~ sub_option %} + {% with option=sub_option, settings=sub_settings %} + {% include "components/datasource-option.html" %} + {% endwith %} + {% endfor %} +
  2. + {% endfor %} +
{% else %}
@@ -21,7 +43,7 @@

{% endif %} {% elif settings.type == "string" %} - 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}"> + 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}" data-default="{{ settings.original_default }}"> {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +77,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "choice" %} - {% for value, label in settings.options.items() %} {% endfor %} @@ -105,7 +127,7 @@ {% elif settings.type == "hue" %}
{% set hue_id = uniqid() %} - diff --git a/webtool/views/views_admin.py b/webtool/views/views_admin.py index 3b425bcef..301dc7138 100644 --- a/webtool/views/views_admin.py +++ b/webtool/views/views_admin.py @@ -666,9 +666,10 @@ def manipulate_settings(): global_value = global_settings.get(option, definition.get(option, {}).get("default")) is_changed = tag and global_value != tag_value - default = all_settings.get(option, definition.get(option, {}).get("default")) + default = definition.get(option, {}).get("default") + current_value = all_settings.get(option, definition.get(option, {}).get("default")) if definition.get(option, {}).get("type") == UserInput.OPTION_TEXT_JSON: - default = json.dumps(default) + current_value = json.dumps(current_value) # this is used for organising things in the UI option_owner = option.split(".")[0] @@ -694,7 +695,8 @@ def manipulate_settings(): "default": all_settings.get(option) }), "submenu": submenu, - "default": default, + "default": current_value, # override default so this is the value displayed in the web UI + "original_default": default, # but also save the actual default "tabname": tabname, "is_changed": is_changed } From 4c429dfdf47a9c72fe84a19dcfc0676605dd75ad Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 16:49:59 +0200 Subject: [PATCH 15/53] Refactor everything --- backend/workers/llm_manager.py | 83 ++++++ backend/workers/ollama_manager.py | 139 --------- backend/workers/refresh_items.py | 42 --- common/assets/llms.json | 268 +++++++++--------- common/lib/config_definition.py | 31 +- common/lib/llm/__init__.py | 0 common/lib/{llm.py => llm/adapter.py} | 211 +++++--------- common/lib/llm/litellm_client.py | 62 ++++ common/lib/llm/llm_client.py | 190 +++++++++++++ common/lib/llm/lmstudio_client.py | 69 +++++ common/lib/{ => llm}/ollama_client.py | 234 ++++----------- common/lib/llm/thirdparty_client.py | 56 ++++ common/lib/llm/vllm_client.py | 59 ++++ processors/machine_learning/llm_prompter.py | 265 +++-------------- webtool/lib/template_filters.py | 12 + webtool/static/css/stylesheet.css | 12 + webtool/static/js/modules/multi-form.js | 14 +- .../templates/controlpanel/llm-server.html | 66 +++-- webtool/views/views_llm.py | 40 ++- 19 files changed, 935 insertions(+), 918 deletions(-) create mode 100644 backend/workers/llm_manager.py delete mode 100644 backend/workers/ollama_manager.py create mode 100644 common/lib/llm/__init__.py rename common/lib/{llm.py => llm/adapter.py} (60%) create mode 100644 common/lib/llm/litellm_client.py create mode 100644 common/lib/llm/llm_client.py create mode 100644 common/lib/llm/lmstudio_client.py rename common/lib/{ => llm}/ollama_client.py (50%) create mode 100644 common/lib/llm/thirdparty_client.py create mode 100644 common/lib/llm/vllm_client.py diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py new file mode 100644 index 000000000..3b279de36 --- /dev/null +++ b/backend/workers/llm_manager.py @@ -0,0 +1,83 @@ +""" +Manage LLM models +""" +from backend.lib.worker import BasicWorker +from common.lib.llm.llm_client import LLMProviderClient + +import json + +class LLMProviderManager(BasicWorker): + """ + Manages LLM models + + Periodically refreshes the list of available models from an LLM provider. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + - provider: the URL of the LLM provider, as configured in the + llm.providers setting. if not given, run on all providers + + Job remote_id: + - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-llm" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-llm-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + provider = self.job.details.get("provider", "") if self.job.details else None + model_name = self.job.data["remote_id"] + available_models = None + + for provider_config in self.config.get("llm.providers", []): + if provider and provider != provider_config["url"]: + continue + + try: + client = LLMProviderClient.get_client(self.config, provider_config) + except ValueError: + self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping") + continue + + # note that technically it is possible to pull/delete a model on + # multiple providers at once (if a model_name is defined but no + # provider). may not be a problem? may be useful one day? + success = False + if task == "pull" and hasattr(client, "pull_model"): + success = client.pull_model(model_name) + + elif task == "delete" and hasattr(client, "delete_model"): + success = client.delete_model(model_name) + + if success or task == "refresh": + # refresh models after pulling/deleting, or when asked to + if available_models is None: + available_models = {} + + for model in client.list_models(): + model = client.build_model_entry(model) + available_models[model["id"]] = model + + self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})") + + elif success is None: + self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client") + else: + self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") + + if available_models is not None: + self.config.set("llm.available_models", available_models) + + self.job.finish() diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py deleted file mode 100644 index 91b8bfac1..000000000 --- a/backend/workers/ollama_manager.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -Manage Ollama LLM models -""" -from backend.lib.worker import BasicWorker -from common.lib.ollama_client import OllamaClient - - -class OllamaManager(BasicWorker): - """ - Manage Ollama LLM models - - Periodically refreshes the list of available models from an Ollama server. - Can also pull or delete models on demand when queued with a specific task. - - Job details: - - task: "refresh" (default), "pull", or "delete" - - Job remote_id: - - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) - - For pull/delete: the model name to pull or delete - """ - type = "manage-ollama" - max_workers = 1 - client = None - - @classmethod - def ensure_job(cls, config=None): - """ - Ensure the daily refresh job is always scheduled - - :return: Job parameters for the worker - """ - return {"remote_id": "manage-ollama-refresh", "interval": 86400} - - def work(self): - task = self.job.details.get("task", "refresh") if self.job.details else "refresh" - model_name = self.job.data["remote_id"] - - self.client = self._get_client() # Initialize client once per job run - if not self.client.is_available(): - self.job.finish() - return - - if task == "refresh": - self.refresh_models() - elif task == "pull": - success = self.pull_model(model_name) - if success: - self.refresh_models() - elif task == "delete": - success = self.delete_model(model_name) - if success: - self.refresh_models() - else: - self.log.warning(f"OllamaManager: unknown task '{task}'") - - self.job.finish() - - def _get_client(self) -> OllamaClient: - """Return a fresh OllamaClient configured from 4CAT settings.""" - if not self.client: - self.client = OllamaClient.from_config(self.config, log=self.log) - return self.client - - def refresh_models(self): - """ - Query the Ollama server for available models and update llm.available_models. - """ - if not self.config.get("llm.server", ""): - return - - client = self._get_client() - models = client.list_models() - - if not models and not self.config.get("llm.server", ""): - return - - available_models = {} - for model in models: - model_id = model["name"] - meta = client.show_model(model_id) - if meta: - try: - display_name = OllamaClient.format_display_name(model_id, meta) - except Exception as e: - self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") - display_name = model_id - else: - self.log.debug(f"OllamaManager: could not get metadata for {model_id}, using name only") - display_name = model_id - - available_models[model_id] = OllamaClient.build_model_entry(model_id, display_name, meta) - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - - def pull_model(self, model_name): - """ - Pull a model from the Ollama registry. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - if not self.config.get("llm.server", ""): - self.log.warning("OllamaManager: cannot pull model - no LLM server configured") - return False - - success = self._get_client().pull_model(model_name) - if success: - self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") - else: - self.log.warning(f"OllamaManager: could not pull model '{model_name}'") - return success - - def delete_model(self, model_name): - """ - Delete a model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - if not self.config.get("llm.server", ""): - self.log.warning("OllamaManager: cannot delete model - no LLM server configured") - return False - - success = self._get_client().delete_model(model_name) - if success: - self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") - else: - self.log.warning(f"OllamaManager: could not delete model '{model_name}'") - return success diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 1a9811603..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -23,46 +23,4 @@ class ItemUpdater(BasicWorker): def work(self): # Placeholder – no tasks implemented yet. self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file diff --git a/common/assets/llms.json b/common/assets/llms.json index 835dbaa09..c17351488 100644 --- a/common/assets/llms.json +++ b/common/assets/llms.json @@ -1,128 +1,140 @@ -{ - "none": { - "name": "", - "model_card": "", - "provider": "", - "default": true - }, - "custom": { - "name": "[custom]", - "model_card": "", - "provider": "" - }, - "gpt-5.4": { - "name": "[OpenAI] GPT-5.4", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4", - "provider": "openai" - }, - "gpt-5-mini": { - "name": "[OpenAI] GPT-5 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", - "provider": "openai" - }, - "gpt-5-nano": { - "name": "[OpenAI] GPT-5 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", - "provider": "openai" - }, - "gpt-5.4-pro": { - "name": "[OpenAI] GPT-5.4 Pro", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", - "provider": "openai" - }, - "gpt-4.1-mini": { - "name": "[OpenAI] GPT-4.1 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", - "provider": "openai" - }, - "gpt-4.1-nano": { - "name": "[OpenAI] GPT-4.1 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", - "provider": "openai" - }, - "gpt-4.1": { - "name": "[OpenAI] GPT-4.1", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1", - "provider": "openai" - }, - "gpt-4o-mini": { - "name": "[OpenAI] GPT-4o mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", - "provider": "openai" - }, - "gpt-4o": { - "name": "[OpenAI] GPT-4o", - "model_card": "https://platform.openai.com/docs/models/gpt-4o", - "provider": "openai" - }, - "gemini-3.1-pro-preview": { - "name": "[Google] Gemini 3.1 Pro", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", - "provider": "google" - }, - "gemini-3-flash-preview": { - "name": "[Google] Gemini 3 Flash", - "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", - "provider": "google" - }, - "gemini-3.1-flash-lite-preview": { - "name": "[Google] Gemini 3.1 Flash Lite", - "provider": "google", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" - }, - "claude-opus-4-6": { - "name": "[Anthropic] Claude Opus 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/opus", - "provider": "anthropic" - }, - "claude-sonnet-4-6": { - "name": "[Anthropic] Claude Sonnet 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/sonnet", - "provider": "anthropic" - }, - "claude-4-5-haiku": { - "name": "[Anthropic] Claude 4.5 Haiku (latest)", - "model_card": "https://www.anthropic.com/claude/haiku", - "provider": "anthropic" - }, - "magistral-small-2509": { - "name": "[Mistral] Magistral Small 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", - "provider": "mistral" - }, - "magistral-medium-2509": { - "name": "[Mistral] Magistral Medium 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", - "provider": "mistral" - }, - "mistral-small-2506": { - "name": "[Mistral] Mistral Small 3.2 (25.06)", - "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", - "provider": "mistral" - }, - "mistral-medium-2508": { - "name": "[Mistral] Mistral Medium 3.1 (25.08)", - "model_card": "mistral-medium-2508", - "provider": "mistral" - }, - "mistral-large-2512": { - "name": "[Mistral] Mistral Large 3 (25.12)", - "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", - "provider": "mistral" - }, - "open-mistral-nemo-2407": { - "name": "[Mistral] Mistral Nemo 12B", - "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", - "provider": "mistral" - }, - "deepseek-chat": { - "name": "[DeepSeek] DeepSeek latest (non-reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - }, - "deepseek-reasoner": { - "name": "[DeepSeek] DeepSeek latest (reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - } -} \ No newline at end of file +[ + { + "model": "gpt-5.4", + "name": "[OpenAI] GPT-5.4", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4", + "provider": "openai" + }, + { + "model": "gpt-5-mini", + "name": "[OpenAI] GPT-5 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", + "provider": "openai" + }, + { + "model": "gpt-5-nano", + "name": "[OpenAI] GPT-5 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", + "provider": "openai" + }, + { + "model": "gpt-5.4-pro", + "name": "[OpenAI] GPT-5.4 Pro", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", + "provider": "openai" + }, + { + "model": "gpt-4.1-mini", + "name": "[OpenAI] GPT-4.1 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", + "provider": "openai" + }, + { + "model": "gpt-4.1-nano", + "name": "[OpenAI] GPT-4.1 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", + "provider": "openai" + }, + { + "model": "gpt-4.1", + "name": "[OpenAI] GPT-4.1", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1", + "provider": "openai" + }, + { + "model": "gpt-4o-mini", + "name": "[OpenAI] GPT-4o mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", + "provider": "openai" + }, + { + "model": "gpt-4o", + "name": "[OpenAI] GPT-4o", + "model_card": "https://platform.openai.com/docs/models/gpt-4o", + "provider": "openai" + }, + { + "model": "gemini-3.1-pro-preview", + "name": "[Google] Gemini 3.1 Pro", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", + "provider": "google" + }, + { + "model": "gemini-3-flash-preview", + "name": "[Google] Gemini 3 Flash", + "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", + "provider": "google" + }, + { + "model": "gemini-3.1-flash-lite-preview", + "name": "[Google] Gemini 3.1 Flash Lite", + "provider": "google", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" + }, + { + "model": "claude-opus-4-6", + "name": "[Anthropic] Claude Opus 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/opus", + "provider": "anthropic" + }, + { + "model": "claude-sonnet-4-6", + "name": "[Anthropic] Claude Sonnet 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/sonnet", + "provider": "anthropic" + }, + { + "model": "claude-4-5-haiku", + "name": "[Anthropic] Claude 4.5 Haiku (latest)", + "model_card": "https://www.anthropic.com/claude/haiku", + "provider": "anthropic" + }, + { + "model": "magistral-small-2509", + "name": "[Mistral] Magistral Small 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", + "provider": "mistral" + }, + { + "model": "magistral-medium-2509", + "name": "[Mistral] Magistral Medium 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", + "provider": "mistral" + }, + { + "model": "mistral-small-2506", + "name": "[Mistral] Mistral Small 3.2 (25.06)", + "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", + "provider": "mistral" + }, + { + "model": "mistral-medium-2508", + "name": "[Mistral] Mistral Medium 3.1 (25.08)", + "model_card": "mistral-medium-2508", + "provider": "mistral" + }, + { + "model": "mistral-large-2512", + "name": "[Mistral] Mistral Large 3 (25.12)", + "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "provider": "mistral" + }, + { + "model": "open-mistral-nemo-2407", + "name": "[Mistral] Mistral Nemo 12B", + "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", + "provider": "mistral" + }, + { + "model": "deepseek-chat", + "name": "[DeepSeek] DeepSeek latest (non-reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + }, + { + "model": "deepseek-reasoner", + "name": "[DeepSeek] DeepSeek latest (reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + } +] \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index c64f8d633..e5c2a726f 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -443,30 +443,16 @@ "global": True }, "flask.autologin.hostnames": { - "type": UserInput.OPTION_MULTI_OPTION, + "type": UserInput.OPTION_TEXT_JSON, "default": [], - "options": { - "hostname": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "Host name or IP address" - } - }, "help": "White-listed hostnames", "tooltip": "A list of host names or IP addresses to automatically log in. Docker should include localhost and " "Server Name. Front-end needs to be restarted for changed to apply.", "global": True }, "flask.autologin.api": { - "type": UserInput.OPTION_MULTI_OPTION, + "type": UserInput.OPTION_TEXT_JSON, "default": [], - "options": { - "hostname": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "Host name or IP address" - } - }, "help": "White-list for API", "tooltip": "A list of host names or IP addresses to allow access to API endpoints with no rate limiting. " "Docker should include localhost and Server Name. Front-end needs to be restarted for changed to " @@ -593,41 +579,42 @@ "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." }, - "llm.services": { + "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, "default": [], "global": True, "help": "LLM providers", "options": { - "host_name": { + "name": { "type": UserInput.OPTION_TEXT, "default": "4CAT LLM Server", "help": "Name of LLM Server in UI", "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", }, - "provider_type": { + "type": { "type": UserInput.OPTION_CHOICE, "help": "LLM Provider Type", "default": "none", "options": { "ollama": "Ollama", "litellm": "LiteLLM", + "api": "Third-party models via APIs (OpenAI, Mistral, etc)", "none": "None", }, }, - "server": { + "url": { "type": UserInput.OPTION_TEXT, "default": "", "help": "LLM Server URL", "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", }, - "auth_type": { + "auth_header": { "type": UserInput.OPTION_TEXT, "help": "Authentication Header", "default": "", "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", }, - "api_key": { + "auth_key": { "type": UserInput.OPTION_TEXT, "default": "", "help": "Authentication Key", diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py similarity index 60% rename from common/lib/llm.py rename to common/lib/llm/adapter.py index 0901194d1..5a3ece227 100644 --- a/common/lib/llm.py +++ b/common/lib/llm/adapter.py @@ -1,9 +1,11 @@ import json import base64 import mimetypes -import requests + from pathlib import Path from typing import List, Optional, Union + +from langchain_community.chat_models import ChatLiteLLM from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -18,111 +20,89 @@ class LLMAdapter: def __init__( self, - provider: str, - model: str, + config, + model, api_key: Optional[str] = None, - base_url: Optional[str] = None, temperature: float = 0.1, max_tokens: int = 1000, client_kwargs: Optional[dict] = None, ): """ - provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' - model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) - api_key: API key if required (OpenAI, Claude, Google, Mistral) - base_url: for local models or Mistral custom endpoints - temperature: temperature hyperparameter, - max_tokens: how many output tokens may be used - client_kwargs: additional client parameters + Instantiate an adapter to interface with an LLM model + + :param config: 4CAT config reader + :param model: Model metadata (as in `llm.available_models` 4CAT setting) + :param api_key: API key, if needed + :param temperature: Temperature hyperparameter + :param max_tokens: Max tokens to generate + :param client_kwargs: Optional parameters for the LLM adapter class """ - self.provider = provider.lower() + known_providers = {p['url']: p for p in config.get("llm.providers")} + self.model = model + self.provider = known_providers.get(model['provider']) self.api_key = api_key - self.base_url = base_url self.temperature = temperature self.structured_output = False self.parser = None self.max_tokens = max_tokens self.client_kwargs = dict(client_kwargs) if client_kwargs else {} + self.llm: BaseChatModel = self._load_llm() def _load_llm(self) -> BaseChatModel: - if self.provider == "openai": - kwargs = {} - if "o3" not in self.model: - kwargs["temperature"] = self.temperature # temperature not supported for all models - return ChatOpenAI( - model=self.model, - api_key=SecretStr(self.api_key), - base_url=self.base_url or "https://api.openai.com/v1", - max_tokens=self.max_tokens, - **kwargs - ) - elif self.provider == "google": - return ChatGoogleGenerativeAI( - model=self.model, - temperature=self.temperature, - google_api_key=self.api_key, - max_tokens=self.max_tokens - ) - elif self.provider == "anthropic": - return ChatAnthropic( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - max_tokens=self.max_tokens, - timeout=100, - stop=None - ) - elif self.provider == "mistral": - return ChatMistralAI( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, # Optional override - max_tokens=self.max_tokens, - ) - elif self.provider == "deepseek": - return ChatDeepSeek( - model=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, - ) - elif self.provider == "ollama": - ollama_adapter = ChatOllama( - model=self.model, - temperature=self.temperature, - base_url=self.base_url or "http://localhost:11434", - max_tokens=self.max_tokens, - client_kwargs=self.client_kwargs - ) - self.model = ollama_adapter.model - return ollama_adapter - elif self.provider in {"vllm", "lmstudio"}: + chat_params = { + "model": self.model["local_id"], + "api_key": SecretStr(self.api_key), + "base_url": self.provider["url"], + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + if self.provider["type"] == "openai": + if "o3" in self.model: + del chat_params["temperature"] + chat_params["base_url"] = self.provider["url"] or "https://api.openai.com/v1" + adapter_class = ChatOpenAI + + elif self.provider["type"] == "google": + adapter_class = ChatGoogleGenerativeAI + + elif self.provider["type"] == "anthropic": + chat_params.update({"timeout": 100, "stop": None}) + adapter_class = ChatAnthropic + + elif self.provider["type"] == "mistral": + adapter_class = ChatMistralAI + + elif self.provider["type"] == "deepseek": + chat_params["max_tokens"] = min(self.max_tokens, 8192) + adapter_class = ChatDeepSeek + + elif self.provider["type"] == "ollama": + adapter_class = ChatOllama + chat_params.update({"client_kwargs": self.client_kwargs}) + + elif self.provider["type"] == "litellm": + adapter_class = ChatOpenAI + if self.provider["auth_header"]: + chat_params.update({ + "default_headers": { + self.provider["auth_header"]: self.provider["auth_key"] + } + }) + + elif self.provider["type"] in {"vllm", "lmstudio", "litellm"}: # OpenAI-compatible local servers if self.provider == "lmstudio" and not self.api_key: self.api_key = "lm-studio" - # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. - if self.provider == "vllm" and self.model=="vllm_model": - model_name = self.get_vllm_model_name(self.base_url, self.api_key) - self.model = model_name - else: - model_name = self.model if self.model else "lmstudio-model" - - llm = ChatOpenAI( - model=model_name, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens, - ) - self.model = llm.model_name - return llm + adapter_class = ChatOpenAI + else: - raise ValueError(f"Unsupported LLM provider: {self.provider}") + raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}") + + return adapter_class(**chat_params) def generate_text( self, @@ -161,7 +141,7 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model["local_id"]: kwargs = {} try: @@ -304,31 +284,6 @@ def set_structure(self, json_schema): self.llm = self.llm.with_structured_output(json_schema) self.structured_output = True - @staticmethod - def get_model_options(config) -> dict: - """ - Returns model choice options for UserInput - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - options = {model_id: model_values["name"] for model_id, model_values in models.items()} - return options - - @staticmethod - def get_model_providers(config) -> dict: - """ - Returns available model providers through APIs - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - providers = list(set([model_values.get("provider", "") for model_values in models.values()])) - if not providers: - return {} - options = {provider: provider.capitalize() for provider in providers if provider} - return options - @staticmethod def get_models(config) -> dict: """ @@ -337,36 +292,6 @@ def get_models(config) -> dict: :returns dict, A dict with model IDs as keys and details as values """ - with ( - config.get("PATH_ROOT") - .joinpath("common/assets/llms.json") - .open() as available_models - ): - available_models = json.loads(available_models.read()) - return available_models - - - @staticmethod - def get_vllm_model_name(base_url: str, api_key: str = None) -> str: - """ - Query vLLM server to get the name of the served model. - """ - - try: - # vLLM exposes available models at /v1/models endpoint - models_url = f"{base_url.rstrip('/')}/models" - headers = {} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - response = requests.get(models_url, headers=headers, timeout=10) - response.raise_for_status() - models_data = response.json() - - # Get the first available model - if models_data.get("data") and len(models_data["data"]) > 0: - return models_data["data"][0]["id"] - else: - raise ValueError("No models found on vLLM server") - except Exception as e: - raise ValueError(f"Could not retrieve model name from vLLM server: {e}") + available_models = config.get("llm.available_models", {}) + enabled_models = config.get("llm.enabled_models", {}) + return {k: v for k, v in available_models.items() if k in enabled_models} \ No newline at end of file diff --git a/common/lib/llm/litellm_client.py b/common/lib/llm/litellm_client.py new file mode 100644 index 000000000..f6f7d59ff --- /dev/null +++ b/common/lib/llm/litellm_client.py @@ -0,0 +1,62 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class LiteLLMClient(LLMProviderClient): + type = "litellm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model_name" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py new file mode 100644 index 000000000..b31968399 --- /dev/null +++ b/common/lib/llm/llm_client.py @@ -0,0 +1,190 @@ +""" +Centralized HTTP client for communicating with an LLM provider. + +This class owns all direct HTTP calls to the provider's REST API and provides +shared static helpers for capability parsing, display-name formatting, and +building canonical llm.available_models entries. It is a plain helper with no +4CAT base-class dependency. +""" + +from abc import abstractmethod + +import requests + + +class LLMProviderClient: + _headers = {} + _meta = {} + + @staticmethod + def get_client(config, provider_config: dict) -> "LLMProviderClient": + """ + Get a client for an LLM provider + + Returns the appropriate sub-class depending on the provider type. + + :param config: 4CAT config reader + :param dict provider_config: Provider parameters, as configured in + 4CAT + :return LLMProviderClient: + """ + # in-line import because we otherwise get circular import shenanigans + from common.lib.llm.ollama_client import OllamaClient + from common.lib.llm.litellm_client import LiteLLMClient + from common.lib.llm.lmstudio_client import LMStudioClient + from common.lib.llm.thirdparty_client import ThirdPartyClient + + for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): + if client_type.type == provider_config["type"]: + return client_type(config, provider_config) + + raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}") + + def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None: + """ + HTTP client for an LLM Provider + + :param dict provider_config: Provider parameters, as configured in 4CAT + :param int timeout: Default request timeout in seconds. + :param Logger log: 4CAT log handler + """ + self.config = config + + self._meta = provider_config + self.base_url = provider_config["url"].rstrip("/") + self.auth_type = provider_config.get("auth_header") + self.auth_key = provider_config.get("auth_key") + self.timeout = timeout + + self._session = requests.Session() + self._headers = {"Content-Type": "application/json"} + + if self.auth_type: + self._headers[self.auth_type] = self.auth_key + + self.log = log + + def get_status(self) -> bool | int: + """ + Check if the server is reachable and responding to requests + + :return: `False` if the server is not responding, or an HTTP status code. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning( + f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}") + return False + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get(self._models_info_key, []) + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}") + return [] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + return { + "id": self.get_global_model_id(meta), + "local_id": self.get_model_id(meta), + "name": self.format_display_name(meta), + "model_card": self.get_model_card_url(meta), + "provider_type": self._meta["type"], + "provider": self._meta["url"], + "supported_media_types": self.parse_supported_media_types(meta), + "metadata": meta, + } + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return "" + + @abstractmethod + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + pass + + @abstractmethod + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param dict meta: Model metadata + :returns str: Human-readable display name string. + """ + pass + + def get_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This is the ID within the provider context, i.e. it is not guaranteed + to be globally unique (use `get_global_model_id()` instead). + + :param dict meta: Model metadata + :return str: Model ID + """ + return meta[self._model_id_key] + + def get_global_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This needs to be a *globally* unique ID, i.e. if multiple providers + provide the same model, the ID should still be unique per provider. + + :param dict meta: Model metadata + :return str: Model ID + """ + return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/llm/lmstudio_client.py b/common/lib/llm/lmstudio_client.py new file mode 100644 index 000000000..5ceb99c27 --- /dev/null +++ b/common/lib/llm/lmstudio_client.py @@ -0,0 +1,69 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class LMStudioClient(LLMProviderClient): + type = "lmstudio" + + _models_info_path = "/api/v1/models" + _models_info_key = "models" + _model_id_key = "key" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("capabilities"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["capabilities"].get("vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("display_name"): + model_name = meta["display_name"] + + extra_bits = [] + if "publisher" in meta: + extra_bits.append(meta["publisher"]) + + if "params_string" in meta: + extra_bits.append(meta["params_string"]) + + + model_name += f" {', '.join(extra_bits)}" + + return model_name \ No newline at end of file diff --git a/common/lib/ollama_client.py b/common/lib/llm/ollama_client.py similarity index 50% rename from common/lib/ollama_client.py rename to common/lib/llm/ollama_client.py index 5d7bea1da..096249c1a 100644 --- a/common/lib/ollama_client.py +++ b/common/lib/llm/ollama_client.py @@ -9,138 +9,16 @@ import re import requests -from typing import Optional - - -class OllamaClient: - """ - HTTP client for an Ollama server. - - :param base_url: Base URL of the Ollama server (e.g. "http://localhost:11434"). - :param api_key: Optional API key for authentication. - :param auth_type: Header name to use for the API key (e.g. "Authorization"). - :param timeout: Default request timeout in seconds. - """ - - def __init__(self, base_url: str, api_key: Optional[str] = None, - auth_type: Optional[str] = None, timeout: int = 10, log=None) -> None: - self.base_url = base_url.rstrip("/") - self.api_key = api_key - self.auth_type = auth_type - self.timeout = timeout - self._session = requests.Session() - self.log = log - - def _headers(self) -> dict: - """Build request headers, including auth if configured.""" - headers = {"Content-Type": "application/json"} - if self.api_key and self.auth_type: - headers[self.auth_type] = self.api_key - return headers - - def is_available(self) -> bool: - """Check if the Ollama server is reachable and responding to /api/tags.""" - try: - r = self._session.get( - f"{self.base_url}/api/tags", - headers=self._headers(), - timeout=self.timeout, - ) - if self.log and r.status_code != 200: - self.log.warning(f"OllamaClient: server responded with status code {r.status_code} during availability check: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: server is not available at {self.base_url}: {e}") - return False - - def list_models(self) -> list[dict]: - """List available models from the Ollama server. - - :returns: List of model dicts from ``/api/tags``, or ``[]`` on failure. - """ - try: - r = self._session.get( - f"{self.base_url}/api/tags", - headers=self._headers(), - timeout=self.timeout, - ) - if r.status_code == 200: - return r.json().get("models", []) - if self.log: - self.log.warning(f"OllamaClient: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to list models from {self.base_url}: {e}") - return [] - - def show_model(self, model_id: str) -> dict | None: - """Fetch full metadata for a model via ``POST /api/show``. - - :param model_id: Model name (e.g. ``"llama3:8b"``). - :returns: Parsed response dict, or ``None`` on failure. - """ - try: - r = self._session.post( - f"{self.base_url}/api/show", - headers=self._headers(), - json={"model": model_id}, - timeout=self.timeout, - ) - if r.status_code == 200: - return r.json() - if self.log: - self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}: {e}") - return None +from common.lib.llm.llm_client import LLMProviderClient - def pull_model(self, model_id: str, stream: bool = False) -> bool: - """Pull a model from the Ollama registry. +class OllamaClient(LLMProviderClient): + type = "ollama" - :param model_id: Model name (e.g. ``"llama3:8b"``). - :param stream: Whether to stream the response (default ``False``). - :returns: ``True`` on success, ``False`` on failure. - """ - try: - r = self._session.post( - f"{self.base_url}/api/pull", - headers=self._headers(), - json={"model": model_id, "stream": stream}, - timeout=600, - ) - if r.status_code != 200 and self.log: - self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}: {e}") - return False - - def delete_model(self, model_id: str) -> bool: - """Delete a model from the Ollama server. - - :param model_id: Model name (e.g. ``"llama3:8b"``). - :returns: ``True`` on success, ``False`` on failure. - """ - try: - r = self._session.delete( - f"{self.base_url}/api/delete", - headers=self._headers(), - json={"model": model_id}, - timeout=30, - ) - if r.status_code != 200 and self.log: - self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}: {e}") - return False + _models_info_path = "/api/tags" + _models_info_key = "models" + _model_id_key = "model" - @staticmethod - def parse_supported_media_types(meta: dict | None) -> list[str]: + def parse_supported_media_types(self, meta: dict) -> list[str]: """Derive the media types a model supports from its Ollama metadata. **Primary path**: reads ``meta["capabilities"]``: @@ -188,19 +66,15 @@ def parse_supported_media_types(meta: dict | None) -> list[str]: return media_types - @staticmethod - def format_display_name(model_id: str, meta: dict | None) -> str: - """Build a human-readable display name for a model. - - Logic is identical to the legacy ``OllamaManager._format_model_display_name`` - and has been moved here so it can be shared across OllamaManager and any - other caller without importing the worker class. + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. + :param dict meta: Model metadata + :returns str: Human-readable display name string. """ model_info = meta.get("model_info", {}) if meta else {} + model_id = self.get_global_model_id(meta) details = meta.get("details", {}) if meta else {} basename = None @@ -264,7 +138,6 @@ def _humanize(n): tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - suffix = None if tag: tl = tag.lower() if tl in ("latest", "stable", "current"): @@ -289,41 +162,58 @@ def _humanize(n): return f"{basename} ({suffix})" - @staticmethod - def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> dict: - """Build a canonical ``llm.available_models`` entry for a model. + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model - :param model_id: Raw Ollama model identifier. - :param display_name: Human-readable name (from ``format_display_name``). - :param meta: ``/api/show`` response dict, or ``None`` if unavailable. - :returns: Dict ready to store under ``llm.available_models[model_id]``. + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) """ - has_meta = bool(meta) - return { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local", - "metadata_success": has_meta, - "model_info": meta.get("model_info", {}) if has_meta else {}, - "capabilities": meta.get("capabilities", []) if has_meta else [], - "details": meta.get("details", {}) if has_meta else {}, - "modified_at": meta.get("modified_at", None) if has_meta else None, - "supported_media_types": OllamaClient.parse_supported_media_types(meta), - } + return f"https://ollama.com/library/{meta['model']}" + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers, + json={"model": model_id, "stream": stream}, + timeout=600, + ) - @classmethod - def from_config(cls, config, log=None) -> "OllamaClient": - """Instantiate an OllamaClient from 4CAT config. + if r.status_code != 200 and self.log: + self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. + return r.status_code == 200 + + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. - :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. - :param log: A logging instance for reporting issues. - :returns: Configured ``OllamaClient``. + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. """ - return cls( - base_url=config.get("llm.server", ""), - api_key=config.get("llm.api_key", "") or None, - auth_type=config.get("llm.auth_type", "") or None, - log=log, - ) + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers, + json={"model": model_id}, + timeout=30, + ) + if r.status_code != 200 and self.log: + self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False \ No newline at end of file diff --git a/common/lib/llm/thirdparty_client.py b/common/lib/llm/thirdparty_client.py new file mode 100644 index 000000000..e6ac5a29c --- /dev/null +++ b/common/lib/llm/thirdparty_client.py @@ -0,0 +1,56 @@ +""" +Fake 'client' to read from local store of known 3d party, API-based LLMs that +can be used with 4CAT +""" +import json + +from common.lib.llm.llm_client import LLMProviderClient + +class ThirdPartyClient(LLMProviderClient): + type = "api" + + _models_info_key = "models" + _model_id_key = "model" + + def get_status(self): + return 200 + + def list_models(self) -> dict: + with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile: + models = json.load(infile) + + return models + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + return meta.get("supported_media_types", ["text"]) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + return meta["name"] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + entry = super().build_model_entry(meta) + entry["provider"] = meta["provider"] + + return entry \ No newline at end of file diff --git a/common/lib/llm/vllm_client.py b/common/lib/llm/vllm_client.py new file mode 100644 index 000000000..512ce0e6f --- /dev/null +++ b/common/lib/llm/vllm_client.py @@ -0,0 +1,59 @@ +""" +Centralized HTTP client for communicating with a vLLM server. + +This class owns all direct HTTP calls to vLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class VLLMClient(LLMProviderClient): + type = "vllm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 26d16e46e..605dbdc21 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -13,10 +13,12 @@ from jsonschema.exceptions import ValidationError, SchemaError from datetime import datetime, timedelta +from matplotlib.style.core import available + from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from backend.lib.processor import BasicProcessor class LLMPrompter(BasicProcessor): @@ -66,20 +68,12 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available - if config.get("llm.access", False) and config.get("llm.server", ""): - # Check some models enabled - shared_llm_enabled_models = config.get("llm.enabled_models", []) - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items() if model in shared_llm_enabled_models} - if not shared_llm_models: - shared_llm_name = False - shared_llm_default = "" - else: - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" - else: - shared_llm_name = False - shared_llm_default = "" - shared_llm_models = {} + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False @@ -94,13 +88,6 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: if parent_media_type in ("video", "audio"): # Ollama and LM Studio currently only support text and image hosted_and_local_available = False - - # Add additional sources for LLM Models - api_or_local_options = {"api": "API"} - if hosted_and_local_available: - api_or_local_options["local"] = "Local" - if shared_llm_name: - api_or_local_options["hosted"] = shared_llm_name options = { "ethics_warning1": { @@ -108,21 +95,14 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "Always test your prompt on a sample of rows, for instance by first using the " "Random filter processor.", }, - "api_or_local": { - "type": UserInput.OPTION_CHOICE, - "help": "Local or API", - "options": api_or_local_options, - "default": "api" if not shared_llm_name else "hosted", - "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid " - "and accessible URL through which the model can be reached.", - }, - "api_model": { + "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": LLMAdapter.get_model_options(config), + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, "default": "none", "tooltip": "Select from the predefined model list or insert manually", - "requires": "api_or_local==api", }, "api_key": { "type": UserInput.OPTION_TEXT, @@ -130,105 +110,9 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_or_local==api", + "requires": "api_model^=api", "sensitive": True, - }, - "api_custom_model_provider": { - "type": UserInput.OPTION_CHOICE, - "help": "Model provider", - "requires": "api_model==custom", - "options": LLMAdapter.get_model_providers(config), - "tooltip": "API provider. Currently limited to this list.", - }, - "api_custom_model_id": { - "type": UserInput.OPTION_TEXT, - "help": "Model ID", - "requires": "api_model==custom", - "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. " - "Fine-tuned models often require more info; OpenAI for instance requires the following " - "format: ft:[modelname]:[org_id]:[custom_suffix]:", - "default": "", - }, - "local_info": { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==local", - "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by " - "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, " - "use the Base URL to interface with any OpenAI-like API endpoint.", - }, - "local_provider": { - "type": UserInput.OPTION_CHOICE, - "requires": "api_or_local==local", - "options": { - "none": "", - "lmstudio": "LM Studio", - "ollama": "Ollama", - "vllm": "vLLM", - }, - "default": "none", - "help": "Local LLM provider", - }, - "lmstudio-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==lmstudio", - "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local " - "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/" - "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top " - "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). " - "4CAT will use the top-most model you have loaded. ", - }, - "ollama-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "Ollama is a simple command-line application that lets you interface with a range of open-" - "source LLMs and that you can run as a local server. See [this link]" - "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.", - }, - "vllm-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux " - "systems capable of fast inference with a single LLM. Communication is done through an " - "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.", - }, - "local_base_url": { - "type": UserInput.OPTION_TEXT, - "requires": "api_or_local==local", - "default": "", - "help": "Base URL", - "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM " - "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).", - }, - "lmstudio_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LM Studio API key", - "tooltip": "[optional] Uses `lm-studio` by default.", - "requires": "local_provider==lmstudio", - "sensitive": True, - }, - "vllm_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "vLLM API key", - "tooltip": "[optional] Empty by default.", - "requires": "local_provider==vllm", - "sensitive": True, - }, - "ollama_model": { - "type": UserInput.OPTION_TEXT, - "requires": "local_provider==ollama", - "default": "", - "help": "Ollama model name", - "tooltip": "[required] for example 'llama3.2'", - }, - "hosted_llm_model": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM model", - "options": shared_llm_models, - "default": shared_llm_default, - "requires": "api_or_local==hosted", - }, + } } if is_media_parent: @@ -431,14 +315,8 @@ def is_compatible_with(cls, module=None, config=None): return False def process(self): - self.dataset.update_status("Validating settings") - api_model = self.parameters.get("api_model") - if api_model == "none": - api_model = "" - - modal_location = self.parameters.get("api_or_local", "api") hide_think = self.parameters.get("hide_think", False) # Check if the source dataset is a media archive (zip with images/video/audio) @@ -476,74 +354,22 @@ def process(self): base_url = None client_kwargs = {} - if modal_location == "local": - provider = self.parameters.get("local_provider", "") - base_url = self.parameters.get("local_base_url", "") + # load model and providermetadata + chosen_model_id = self.parameters.get("model") + available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} + if chosen_model_id not in available_models: + return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") - if not provider: - self.dataset.finish_with_error("Choose a local model provider") - return + model = available_models[chosen_model_id] - if provider == "lmstudio": - model = "lmstudio_model" - if not base_url: - base_url = "http://127.0.0.1:1234/v1" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:1234/v1" - if not self.parameters.get("lmstudio_api_key"): - api_key = "lm-studio" - elif provider == "ollama": - model = self.parameters.get("ollama_model", "") - if not model: - self.dataset.finish_with_error("You need to provide a model name for Ollama (e.g. 'llama3.2')") - return - if not base_url: - base_url = "http://localhost:11434" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:11434" - elif provider == "vllm": - model = "vllm_model" - api_key = self.parameters.get("vllm_api_key", "") - if not base_url: - base_url = "http://localhost:8000/v1" - else: - self.dataset.finish_with_error("Local provider not supported, choose either lmstudio or ollama") - return + if model["provider_type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") - elif modal_location == "hosted": - base_url = self.config.get("llm.server", "") - provider = self.config.get("llm.provider_type", "none").lower() - api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - model = self.parameters.get("hosted_llm_model", "") - if api_key and llm_auth_type: - client_kwargs = { - "headers": { - llm_auth_type: api_key - } - } - if provider == "none" or not base_url: - self.dataset.finish_with_error("4CAT LLM server not properly configured; contact the administrator") - return - else: - if not api_model: - self.dataset.finish_with_error("Select an API model or insert one manually") - return - # Models can be set manually - if api_model == "custom": - model = self.parameters.get("api_custom_model_id", "") - provider = self.parameters.get("api_custom_model_provider", "") - if not model: - self.dataset.finish_with_error("You must provide a valid API model name/ID") - return - if not provider: - self.dataset.finish_with_error("You must provide a valid API model provider") - return - else: - model_info = LLMAdapter.get_models(self.config).get(api_model, {}) - provider = model_info.get("provider") - model = api_model + available_providers = {p["url"]: p for p in self.config.get("llm.providers")} + if model["provider"] not in available_providers: + return self.dataset.finish_with_error(f"Model provider {model['provider']} unknown") - api_key = self.parameters.get("api_key") or self.config.get(f"api.{provider}.api_key", "") - if not api_key: - self.dataset.finish_with_error("You need to provide a valid API key") - return + provider = available_providers[model["provider"]] # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -589,14 +415,13 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") - base_url_str = "" if not base_url else f" at base URL '{base_url}'" - self.dataset.log(f"Using LLM provider '{provider}' with model '{model}'{base_url_str}") + base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" + self.dataset.log(f"Using LLM provider '{model['provider']}' with model '{model}'{base_url_str}") try: llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -794,7 +619,7 @@ def process(self): "prompt": prompt, "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": "", @@ -822,7 +647,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and ( + if model["provider"] == "google" and ( output_key.endswith(".signature") or output_key.endswith(".type") ): @@ -857,7 +682,7 @@ def process(self): self.dataset.update_progress(row / max_processed) # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) if limit_reached: @@ -972,10 +797,9 @@ def process(self): json_schema = self.get_json_schema_for_batch(n_batched, custom_schema=json_schema_original) # `llm` becomes a RunnableSequence when used, so we'll need to reset it here llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -990,7 +814,7 @@ def process(self): batch_str = f" and {n_batched} items batched into the prompt" if use_batches else "" self.dataset.update_status(f"Generating text at row {row:,}/" - f"{max_processed:,} with {model}{batch_str}") + f"{max_processed:,} with {model['name']}{batch_str}") # Now finally generate some text! try: response = llm.generate_text( @@ -1014,15 +838,9 @@ def process(self): self.dataset.finish_with_warning(outputs, f"Not all items processed: {e}") return - # Set model name from the response for more details - if hasattr(response, "response_metadata"): - model = response.response_metadata.get("model_name", model) - if "models/" in model: - model = model.replace("models/", "") - if not response: structured_warning = " with your specified JSON schema" if structured_output else "" - warning = f"{model} could not return text{structured_warning}. Consider editing your prompt or changing settings." + warning = f"{model['name']} could not return text{structured_warning}. Consider editing your prompt or changing settings." self.dataset.finish_with_warning(outputs, warning) return @@ -1106,7 +924,7 @@ def process(self): "prompt": prompt if not use_batches else base_prompt, # Insert dataset values if not batching "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": n + 1 if use_batches else "", @@ -1128,7 +946,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and output_key in ("extras.signature", ".type"): + if model["provider"] == "google" and output_key in ("extras.signature", ".type"): continue annotation = { @@ -1146,7 +964,7 @@ def process(self): n_batched = 0 # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) # Write annotations in batches @@ -1267,7 +1085,8 @@ def validate_query(query, request, config): :param config: :return: """ - if query["api_or_local"] == "api" and not query.get("api_key"): + is_external_api = query["model"].startswith("api-") + if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") # For media archive datasets, use_media won't be present in the query @@ -1283,7 +1102,7 @@ def validate_query(query, request, config): raise QueryParametersException("You need to insert column name(s) in the user prompt within brackets " "(e.g. '[body]' or '[timestamp, author]')") - if query["api_or_local"] == "api" and not query.get("frontend-confirm"): + if is_external_api and not query.get("frontend-confirm"): raise QueryNeedsExplicitConfirmationException("Your data will be sent to a third-party service for " "processing, which will share your data with them and is " "likely to incur costs. Do you want to continue?") diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 81b48ad55..02e41dec6 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -415,6 +415,18 @@ def _jinja2_filter_parameter_str(url): return params +@current_app.template_filter("hostname") +def _jinja2_filter_hostname(url: str) -> str: + """ + For a URL, return the hostname + + If no hostname is found, return the original value + + :param str url: + :return str: + """ + return ural.get_hostname(url) or url + @current_app.template_filter("explorer_css") def explorer_css(datasource, scope_class="explorer-content-container"): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index a4097952f..3e2445911 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -349,6 +349,17 @@ article.small .form-element select[multiple] { margin-left: 20px; } +.form-multi-option-wrapper .action-button:not(.hidden) { + position: absolute; + top: 0.5em; + left: 0.5em; + padding: 0 0.4em; +} + +.form-multi-option-wrapper .action-button.delete-button { + left: 2.5em; +} + .form-multi-option-wrapper li::before { content: ''; position: absolute; @@ -472,6 +483,7 @@ h2 .inline-search input, h3 .inline-search input { overflow: hidden; box-sizing: border-box; border-width: 0; + padding: 0; } .ellipsis { font-weight: bold; diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js index 389f5d9a8..9ca67bd93 100644 --- a/webtool/static/js/modules/multi-form.js +++ b/webtool/static/js/modules/multi-form.js @@ -6,12 +6,12 @@ export const multiForm = { actions.className = 'multi-form-actions'; const add_button = document.createElement('button'); - add_button.className = 'add-button'; + add_button.className = 'add-button action-button'; add_button.textContent = '+'; add_button.addEventListener('click', multiForm.add_item); const delete_button = document.createElement('button'); - delete_button.className = 'delete-button'; + delete_button.className = 'delete-button action-button'; delete_button.textContent = 'x'; delete_button.addEventListener('click', multiForm.delete_item); @@ -23,8 +23,10 @@ export const multiForm = { el.querySelectorAll('li').forEach(function (el) { const el_actions = actions.cloneNode(true); el.appendChild(el_actions); - }) - }) + }); + multiForm.renumber(el); + }); + }, handle_click: function (e) { @@ -53,7 +55,6 @@ export const multiForm = { if(!confirm("Are you sure?")){ return false; } - const li = find_parent(e.target, 'li'); const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); @@ -69,9 +70,11 @@ export const multiForm = { let index = 1; parent.querySelectorAll('li').forEach(function (el) { el.setAttribute('data-multi-option-index', index); + el.querySelector('.delete-button').classList.remove('hidden'); multiForm.renumber_items(el, index); index += 1; }) + parent.querySelector('li:last-child .delete-button').classList.add('hidden'); }, renumber_items: function(parent, index) { @@ -81,7 +84,6 @@ export const multiForm = { return; } for(const attribute of attributes) { - console.log(child); if(child.hasAttribute(attribute)) { child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); } diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 22a5573dd..240ebae69 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -21,30 +21,37 @@

LLM Server

- - + + + {% if not providers %} - - + + {% else %} + {% for provider in providers %} - + + {% endfor %} + {% endif %}
SettingValueServerStatus
Server URL{% if llm_server %}{{ llm_server }}{% else %}Not configured{% endif %} + No LLM providers configured. You can add and configure servers via the 'LLM' tab on the Settings page. +
Status{{ provider.name }} - {% if server_status == "online" %} + {% if provider.status == "online" %} Online - {% elif server_status == "not configured" %} + {% elif provider.status == "not configured" %} Not configured {% else %} - {{ server_status }} + {{ provider.status }} {% endif %}
{# Available models #} -

Available Models +

+ Available Models
@@ -57,10 +64,11 @@

Available Models + - Model + Provider/model Display name Capabilities Status @@ -70,7 +78,8 @@

Available Models {% for model_id, model in available_models.items() %} - {{ model_id }} + {{ model.provider_type }}/{{ model.provider|hostname }} + {{ model.local_id }} {% if model.model_card %} @@ -80,7 +89,7 @@

Available Models {% endif %} - {{ model.capabilities | join(", ") }} + {{ model.supported_media_types | join(", ") }} {% if model_id in enabled_models %} @@ -103,6 +112,7 @@

Available Models {% endif %} + {% if model.provider_type == "ollama" %}
@@ -110,16 +120,17 @@

Available Models Delete + {% endif %} {% endfor %} {% else %} - - {% if llm_server %} + + {% if providers %} No models found. Use the Refresh button to fetch available models, or pull a new model below. {% else %} - Configure the LLM server URL in Settings first. + No LLM providers configured. {% endif %} @@ -128,15 +139,28 @@

Available Models

- {# Pull a new model #} - {% if llm_server %} + {# Pull a new model, if an ollama server is configured #} + {% if providers|selectattr("type", "equalto", "ollama")|list %}
-

Pull Model

-

Enter an Ollama model name (e.g. llama3:8b) to pull it from the - Ollama library. - Pulling large models may take several minutes; the job runs in the background.

+

Install new LLMs

+

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For + Ollama, model names can be found in + model library.

+

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install + models for all LLM providers; if your provider is not listed below, it may not be able to add additional + models to it, or you may need to use an external tool to add new models.

+
+ + +
diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 41f9df472..7f32117c1 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -9,6 +9,7 @@ from flask_login import login_required from webtool.lib.helpers import setting_required, error +from common.lib.llm.llm_client import LLMProviderClient component = Blueprint("llm", __name__) @@ -21,26 +22,30 @@ def llm_panel(): LLM Server management panel Shows server status, available models, and controls to pull/delete/refresh - models. Pull, delete, and refresh operations are queued as OllamaManager + models. Pull, delete, and refresh operations are queued as LLMProviderManager jobs rather than run synchronously. """ if not g.config.get("llm.access"): return error(403, message="LLM access is not enabled on this server.") + providers = g.config.get("llm.providers", []) + if request.method == "POST": action = request.form.get("action", "").strip() + provider = request.form.get("provider", "").strip() + details = {"provider": provider} if provider else {} if action == "refresh": # Queue a one-time manual refresh job; use a timestamp-based remote_id # so it is always accepted even if a periodic job already exists. - g.queue.add_job("manage-ollama", details={"task": "refresh"}, - remote_id=f"manage-ollama-manual-{int(time.time())}") + g.queue.add_job("manage-llm", details={**details, "task": "refresh"}, + remote_id=f"manage-llm-manual-{int(time.time())}") flash("Model refresh job queued.") elif action == "pull": model_name = request.form.get("model_name", "").strip() if model_name: - g.queue.add_job("manage-ollama", details={"task": "pull"}, remote_id=model_name) + g.queue.add_job("manage-llm", details={**details, "task": "pull"}, remote_id=model_name) flash(f"Pull job queued for model '{model_name}'.") else: flash("Please provide a model name to pull.") @@ -48,7 +53,7 @@ def llm_panel(): elif action == "delete": model_name = request.form.get("model_name", "").strip() if model_name: - g.queue.add_job("manage-ollama", details={"task": "delete"}, remote_id=model_name) + g.queue.add_job("manage-llm", details={**details, "task": "delete"}, remote_id=model_name) flash(f"Delete job queued for model '{model_name}'.") elif action == "enable": @@ -73,23 +78,15 @@ def llm_panel(): # --- GET: render panel --- - llm_server = g.config.get("llm.server", "") - server_status = "not configured" + for i, provider in enumerate(providers): + client = LLMProviderClient.get_client(g.config, provider) - if llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = g.config.get("llm.api_key", "") - llm_auth_type = g.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key + if provider_status := client.get_status(): + server_status = "online" if provider_status == 200 else f"error (HTTP {provider_status})" + else: + server_status = "unreachable" - try: - resp = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=5) - server_status = "online" if resp.status_code == 200 else f"error (HTTP {resp.status_code})" - except requests.Timeout: - server_status = "unreachable (timeout)" - except requests.RequestException as e: - server_status = f"unreachable ({e})" + providers[i]["status"] = server_status available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) @@ -97,8 +94,7 @@ def llm_panel(): return render_template( "controlpanel/llm-server.html", flashes=get_flashed_messages(), - llm_server=llm_server, - server_status=server_status, + providers=providers, available_models=available_models, enabled_models=enabled_models, ) From a6ecbc21afd54590b89178db356977ef92e0da04 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:02:29 +0200 Subject: [PATCH 16/53] Formatting --- common/lib/config_definition.py | 13 ++++++++-- common/lib/llm/adapter.py | 24 +++++++++---------- common/lib/llm/clients/__init__.py | 0 .../lib/llm/{ => clients}/litellm_client.py | 2 -- .../lib/llm/{ => clients}/lmstudio_client.py | 6 ++--- common/lib/llm/{ => clients}/ollama_client.py | 15 ++++++++---- .../llm/{ => clients}/thirdparty_client.py | 3 ++- common/lib/llm/{ => clients}/vllm_client.py | 5 ++-- common/lib/llm/llm_client.py | 8 +++---- 9 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 common/lib/llm/clients/__init__.py rename common/lib/llm/{ => clients}/litellm_client.py (99%) rename common/lib/llm/{ => clients}/lmstudio_client.py (98%) rename common/lib/llm/{ => clients}/ollama_client.py (91%) rename common/lib/llm/{ => clients}/thirdparty_client.py (98%) rename common/lib/llm/{ => clients}/vllm_client.py (97%) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index e5c2a726f..c0ad5f990 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -581,7 +581,15 @@ }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, - "default": [], + "default": [ + { + "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "" + } + ], "global": True, "help": "LLM providers", "options": { @@ -598,7 +606,8 @@ "options": { "ollama": "Ollama", "litellm": "LiteLLM", - "api": "Third-party models via APIs (OpenAI, Mistral, etc)", + "lmstudio": "LM Studio", + "vllm": "vLLM", "none": "None", }, }, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 5a3ece227..0c372f30d 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -5,7 +5,6 @@ from pathlib import Path from typing import List, Optional, Union -from langchain_community.chat_models import ChatLiteLLM from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -141,7 +140,8 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model["local_id"]: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[ + "local_id"]: kwargs = {} try: @@ -152,10 +152,10 @@ def generate_text( return response def create_multimodal_content( - self, - text: str, - media_urls: Optional[List[str]] = None, - media_files: Optional[List[Union[str, Path]]] = None, + self, + text: str, + media_urls: Optional[List[str]] = None, + media_files: Optional[List[Union[str, Path]]] = None, ) -> List[dict]: """ Create multimodal content structure for LangChain messages with media URLs @@ -204,11 +204,11 @@ def create_multimodal_content( return content def _format_media_block( - self, - url: Optional[str] = None, - b64_data: Optional[str] = None, - mime_type: str = "image/jpeg", - media_category: str = "image", + self, + url: Optional[str] = None, + b64_data: Optional[str] = None, + mime_type: str = "image/jpeg", + media_category: str = "image", ) -> dict: """ Format a single media block for the appropriate provider. @@ -294,4 +294,4 @@ def get_models(config) -> dict: """ available_models = config.get("llm.available_models", {}) enabled_models = config.get("llm.enabled_models", {}) - return {k: v for k, v in available_models.items() if k in enabled_models} \ No newline at end of file + return {k: v for k, v in available_models.items() if k in enabled_models} diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm/litellm_client.py b/common/lib/llm/clients/litellm_client.py similarity index 99% rename from common/lib/llm/litellm_client.py rename to common/lib/llm/clients/litellm_client.py index f6f7d59ff..0da9dc682 100644 --- a/common/lib/llm/litellm_client.py +++ b/common/lib/llm/clients/litellm_client.py @@ -10,8 +10,6 @@ LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface with the OpenAI API as well. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient class LiteLLMClient(LLMProviderClient): diff --git a/common/lib/llm/lmstudio_client.py b/common/lib/llm/clients/lmstudio_client.py similarity index 98% rename from common/lib/llm/lmstudio_client.py rename to common/lib/llm/clients/lmstudio_client.py index 5ceb99c27..b7b0906dc 100644 --- a/common/lib/llm/lmstudio_client.py +++ b/common/lib/llm/clients/lmstudio_client.py @@ -10,10 +10,9 @@ LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface with the OpenAI API as well. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient + class LMStudioClient(LLMProviderClient): type = "lmstudio" @@ -63,7 +62,6 @@ def format_display_name(self, meta: dict) -> str: if "params_string" in meta: extra_bits.append(meta["params_string"]) - model_name += f" {', '.join(extra_bits)}" - return model_name \ No newline at end of file + return model_name diff --git a/common/lib/llm/ollama_client.py b/common/lib/llm/clients/ollama_client.py similarity index 91% rename from common/lib/llm/ollama_client.py rename to common/lib/llm/clients/ollama_client.py index 096249c1a..3064c1173 100644 --- a/common/lib/llm/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -11,6 +11,7 @@ from common.lib.llm.llm_client import LLMProviderClient + class OllamaClient(LLMProviderClient): type = "ollama" @@ -187,13 +188,15 @@ def pull_model(self, model_id: str, stream: bool = False) -> bool: ) if r.status_code != 200 and self.log: - self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 except requests.RequestException as e: if self.log: - self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") return False @@ -211,9 +214,11 @@ def delete_model(self, model_id: str) -> bool: timeout=30, ) if r.status_code != 200 and self.log: - self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 except requests.RequestException as e: if self.log: - self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") - return False \ No newline at end of file + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False diff --git a/common/lib/llm/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py similarity index 98% rename from common/lib/llm/thirdparty_client.py rename to common/lib/llm/clients/thirdparty_client.py index e6ac5a29c..8d2f48602 100644 --- a/common/lib/llm/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -6,6 +6,7 @@ from common.lib.llm.llm_client import LLMProviderClient + class ThirdPartyClient(LLMProviderClient): type = "api" @@ -53,4 +54,4 @@ def build_model_entry(self, meta: dict) -> dict: entry = super().build_model_entry(meta) entry["provider"] = meta["provider"] - return entry \ No newline at end of file + return entry diff --git a/common/lib/llm/vllm_client.py b/common/lib/llm/clients/vllm_client.py similarity index 97% rename from common/lib/llm/vllm_client.py rename to common/lib/llm/clients/vllm_client.py index 512ce0e6f..3c218d3af 100644 --- a/common/lib/llm/vllm_client.py +++ b/common/lib/llm/clients/vllm_client.py @@ -7,10 +7,9 @@ base-class dependency. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient + class VLLMClient(LLMProviderClient): type = "vllm" @@ -56,4 +55,4 @@ def format_display_name(self, meta: dict) -> str: if meta["litellm_params"].get("model"): model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) - return model_name \ No newline at end of file + return model_name diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index b31968399..08485fc75 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -29,10 +29,10 @@ def get_client(config, provider_config: dict) -> "LLMProviderClient": :return LLMProviderClient: """ # in-line import because we otherwise get circular import shenanigans - from common.lib.llm.ollama_client import OllamaClient - from common.lib.llm.litellm_client import LiteLLMClient - from common.lib.llm.lmstudio_client import LMStudioClient - from common.lib.llm.thirdparty_client import ThirdPartyClient + from common.lib.llm.clients.ollama_client import OllamaClient + from common.lib.llm.clients.litellm_client import LiteLLMClient + from common.lib.llm.clients.lmstudio_client import LMStudioClient + from common.lib.llm.clients.thirdparty_client import ThirdPartyClient for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): if client_type.type == provider_config["type"]: From b9b3d0a61409ee1c5f84c4d2c9b137427d00d558 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:04:26 +0200 Subject: [PATCH 17/53] ruff --- backend/workers/llm_manager.py | 2 -- processors/machine_learning/llm_prompter.py | 3 --- webtool/views/views_llm.py | 2 -- 3 files changed, 7 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 3b279de36..398b835ca 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -4,8 +4,6 @@ from backend.lib.worker import BasicWorker from common.lib.llm.llm_client import LLMProviderClient -import json - class LLMProviderManager(BasicWorker): """ Manages LLM models diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 605dbdc21..1ffe2cf98 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -13,8 +13,6 @@ from jsonschema.exceptions import ValidationError, SchemaError from datetime import datetime, timedelta -from matplotlib.style.core import available - from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict @@ -351,7 +349,6 @@ def process(self): # Set all variables through which we can reach the LLM api_key = "" - base_url = None client_kwargs = {} # load model and providermetadata diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 7f32117c1..863d7e9fe 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -3,8 +3,6 @@ """ import time -import requests - from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g from flask_login import login_required From bdf07e2d6449b58b3d4f239b1d8d03588ae1d0e1 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:56:09 +0200 Subject: [PATCH 18/53] Reshuffle OpenAI-related clients --- common/lib/config_definition.py | 6 +- common/lib/llm/adapter.py | 13 +- common/lib/llm/clients/__init__.py | 1 + .../{lmstudio_client.py => openai_client.py} | 24 +- common/lib/llm/clients/vllm_client.py | 58 --- common/lib/llm/llm_client.py | 10 +- processors/machine_learning/llm_prompter.py | 338 +++++++++--------- 7 files changed, 197 insertions(+), 253 deletions(-) rename common/lib/llm/clients/{lmstudio_client.py => openai_client.py} (74%) delete mode 100644 common/lib/llm/clients/vllm_client.py diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index c0ad5f990..7cf0c6483 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -595,7 +595,7 @@ "options": { "name": { "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", + "default": "", "help": "Name of LLM Server in UI", "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", }, @@ -606,8 +606,8 @@ "options": { "ollama": "Ollama", "litellm": "LiteLLM", - "lmstudio": "LM Studio", - "vllm": "vLLM", + "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)", + "api": "Third-party models from OpenAI, Anthropic, Mistral, etc", "none": "None", }, }, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 0c372f30d..9fe80eb49 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -61,7 +61,6 @@ def _load_llm(self) -> BaseChatModel: if self.provider["type"] == "openai": if "o3" in self.model: del chat_params["temperature"] - chat_params["base_url"] = self.provider["url"] or "https://api.openai.com/v1" adapter_class = ChatOpenAI elif self.provider["type"] == "google": @@ -82,8 +81,11 @@ def _load_llm(self) -> BaseChatModel: adapter_class = ChatOllama chat_params.update({"client_kwargs": self.client_kwargs}) - elif self.provider["type"] == "litellm": - adapter_class = ChatOpenAI + elif self.provider["type"] in {"litellm", "openai-like"}: + url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url'] + url += "v1/" if not url.endswith("v1/") else "" + + chat_params.update({"base_url": url}) if self.provider["auth_header"]: chat_params.update({ "default_headers": { @@ -91,11 +93,6 @@ def _load_llm(self) -> BaseChatModel: } }) - elif self.provider["type"] in {"vllm", "lmstudio", "litellm"}: - # OpenAI-compatible local servers - if self.provider == "lmstudio" and not self.api_key: - self.api_key = "lm-studio" - adapter_class = ChatOpenAI else: diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py index e69de29bb..4287ca861 100644 --- a/common/lib/llm/clients/__init__.py +++ b/common/lib/llm/clients/__init__.py @@ -0,0 +1 @@ +# \ No newline at end of file diff --git a/common/lib/llm/clients/lmstudio_client.py b/common/lib/llm/clients/openai_client.py similarity index 74% rename from common/lib/llm/clients/lmstudio_client.py rename to common/lib/llm/clients/openai_client.py index b7b0906dc..fa04446c0 100644 --- a/common/lib/llm/clients/lmstudio_client.py +++ b/common/lib/llm/clients/openai_client.py @@ -1,20 +1,16 @@ """ -Centralized HTTP client for communicating with a LiteLLM server. +Centralized HTTP client for communicating with an OpenAI compatible server. -This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +This class owns all direct HTTP calls to an OpenAI style REST API and provides shared static helpers for capability parsing, display-name formatting, and building canonical llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. - -This class is primarily intended for interfacing with LiteLLM, but since -LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface -with the OpenAI API as well. """ from common.lib.llm.llm_client import LLMProviderClient class LMStudioClient(LLMProviderClient): - type = "lmstudio" + type = "openai-like" _models_info_path = "/api/v1/models" _models_info_key = "models" @@ -28,16 +24,14 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: :returns: Ordered list of supported media type strings. Returns ``[]`` when ``meta`` is ``None`` """ + media_types = {"text"} # far as I can tell, text is always supported + if meta is None or not meta.get("capabilities"): - return [] + return list(media_types) - media_types = {"text"} # far as I can tell, text is always supported if meta["capabilities"].get("vision"): media_types.add("image") - if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") - # no way to tell if model supports embeddings input as far as I can see... return list(media_types) @@ -56,12 +50,12 @@ def format_display_name(self, meta: dict) -> str: model_name = meta["display_name"] extra_bits = [] - if "publisher" in meta: + if meta.get("publisher"): extra_bits.append(meta["publisher"]) - if "params_string" in meta: + if meta.get("params_string"): extra_bits.append(meta["params_string"]) - model_name += f" {', '.join(extra_bits)}" + model_name += f" ({', '.join(extra_bits)})" return model_name diff --git a/common/lib/llm/clients/vllm_client.py b/common/lib/llm/clients/vllm_client.py deleted file mode 100644 index 3c218d3af..000000000 --- a/common/lib/llm/clients/vllm_client.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Centralized HTTP client for communicating with a vLLM server. - -This class owns all direct HTTP calls to vLLM's REST API and provides shared -static helpers for capability parsing, display-name formatting, and building -canonical llm.available_models entries. It is a plain helper with no 4CAT -base-class dependency. - -""" -from common.lib.llm.llm_client import LLMProviderClient - - -class VLLMClient(LLMProviderClient): - type = "vllm" - - _models_info_path = "/model/info" - _models_info_key = "data" - _model_id_key = "model" - - def parse_supported_media_types(self, meta: dict) -> list[str]: - """ - Derive the media types a model supports from its LiteLLM metadata. - - :param meta: ``model info`` response dict, or ``None``. - :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` - """ - if meta is None or not meta.get("model_info"): - return [] - - media_types = {"text"} # far as I can tell, text is always supported - if meta["model_info"].get("supports_vision"): - media_types.add("image") - - if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") - - # no way to tell if model supports embeddings input as far as I can see... - - return list(media_types) - - def format_display_name(self, meta: dict) -> str: - """ - Build a human-readable display name for a model. - - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. - """ - model_name = self.get_global_model_id(meta) - - if meta.get("model_name"): - model_name = meta["model_name"] - - if meta["litellm_params"].get("model"): - model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) - - return model_name diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index 08485fc75..b59d38ab0 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -31,7 +31,7 @@ def get_client(config, provider_config: dict) -> "LLMProviderClient": # in-line import because we otherwise get circular import shenanigans from common.lib.llm.clients.ollama_client import OllamaClient from common.lib.llm.clients.litellm_client import LiteLLMClient - from common.lib.llm.clients.lmstudio_client import LMStudioClient + from common.lib.llm.clients.openai_client import LMStudioClient from common.lib.llm.clients.thirdparty_client import ThirdPartyClient for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): @@ -51,11 +51,17 @@ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) - self.config = config self._meta = provider_config - self.base_url = provider_config["url"].rstrip("/") + + self.timeout = timeout self.auth_type = provider_config.get("auth_header") self.auth_key = provider_config.get("auth_key") self.timeout = timeout + self.base_url = provider_config["url"].rstrip("/") + if self.base_url.endswith("v1"): + # get rid of the 'v1' - we'll add this in the path + self.base_url = f"{self.base_url[:-2]}" + self._session = requests.Session() self._headers = {"Content-Type": "application/json"} diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 1ffe2cf98..9f174f148 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -76,16 +76,12 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False media_type = "media" - hosted_and_local_available = True if parent_dataset: parent_extension = parent_dataset.get_extension() parent_media_type = parent_dataset.get_media_type() if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"): is_media_parent = True media_type = parent_media_type - if parent_media_type in ("video", "audio"): - # Ollama and LM Studio currently only support text and image - hosted_and_local_available = False options = { "ethics_warning1": { @@ -115,179 +111,187 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: if is_media_parent: # Media-specific options: show info about media files being attached - options["media_info"] = { - "type": UserInput.OPTION_INFO, - "help": f"The parent dataset contains {media_type} files that will be sent " - f"to the LLM with each prompt. Make sure to use a model that supports " - f"{media_type} input (e.g. vision models for images).
" - f"Not all models support all media types. If the model cannot process " - f"{media_type} files, an error will be returned during processing.", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": f"Describe what the model should do with each {media_type} file. " - f"No column brackets needed — {media_type} files are attached automatically.", - "default": "", - } + options.update({ + "media_info": { + "type": UserInput.OPTION_INFO, + "help": f"The parent dataset contains {media_type} files that will be sent " + f"to the LLM with each prompt. Make sure to use a model that supports " + f"{media_type} input (e.g. vision models for images).
" + f"Not all models support all media types. If the model cannot process " + f"{media_type} files, an error will be returned during processing.", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": f"Describe what the model should do with each {media_type} file. " + f"No column brackets needed — {media_type} files are attached automatically.", + "default": "", + } + }) + else: - # Text-based dataset options: column brackets, media URL toggle, batching - options["prompt_info"] = { + options.update({ + # Text-based dataset options: column brackets, media URL toggle, batching + "prompt_info": { + "type": UserInput.OPTION_INFO, + "help": "How to prompt
" + "Use `[brackets]` with column names to insert dataset items in the prompt. You " + "can place column brackets in different parts of the prompt or use multiple column names within" + ' a single column bracket to merge items.
Example 1: "Describe the topic ' + 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' + "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " + 'to the following text: `[body]`"
Prompting is a delicate art. See ' + "processor references on best prompting practices.
For predefined research prompts, see " + "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " + "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" + "library).", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": "Use [brackets] with columns names.", + "default": "", + }, + "use_media": { + "type": UserInput.OPTION_TOGGLE, + "help": "Add images", + "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", + "default": False, + }, + "media_columns": { + "type": UserInput.OPTION_TEXT, + "help": "Columns with image URL(s)", + "default": "", + "inline": True, + "tooltip": "Multiple columns can be selected.", + "requires": "use_media==true", + } + }) + + # Common options for both text and media datasets + options.update({ + "structured_output": { + "type": UserInput.OPTION_TOGGLE, + "help": "Output structured JSON", + "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " + "structured output.", + "default": False, + }, + "json_schema_info": { "type": UserInput.OPTION_INFO, - "help": "How to prompt
" - "Use `[brackets]` with column names to insert dataset items in the prompt. You " - "can place column brackets in different parts of the prompt or use multiple column names within" - ' a single column bracket to merge items.
Example 1: "Describe the topic ' - 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' - "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " - 'to the following text: `[body]`"
Prompting is a delicate art. See ' - "processor references on best prompting practices.
For predefined research prompts, see " - "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " - "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" - "library).", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { + "help": "Insert a JSON Schema for structured outputs. These define the output that " + "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" + "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" + "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", + "requires": "structured_output==true", + }, + "json_schema": { "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": "Use [brackets] with columns names.", + "help": "JSON schema", + "tooltip": "[required] A JSON schema that the structured output will adhere to", + "requires": "structured_output==true", "default": "", - } - options["use_media"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Add images", - "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", - "default": False, - } - options["media_columns"] = { + }, + "temperature": { "type": UserInput.OPTION_TEXT, - "help": "Columns with image URL(s)", - "default": "", - "inline": True, - "tooltip": "Multiple columns can be selected.", - "requires": "use_media==true", + "help": "Temperature", + "default": 0.1, + "coerce_type": float, + "max": 2.0, + "tooltip": "Temperature indicates how strict the model will gravitate towards the most " + "probable next token. A score close to 0 returns more predictable " + "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", } - - # Common options for both text and media datasets - options["structured_output"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Output structured JSON", - "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " - "structured output.", - "default": False, - } - options["json_schema_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Insert a JSON Schema for structured outputs. These define the output that " - "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" - "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" - "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", - "requires": "structured_output==true", - } - options["json_schema"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "JSON schema", - "tooltip": "[required] A JSON schema that the structured output will adhere to", - "requires": "structured_output==true", - "default": "", - } - options["temperature"] = { - "type": UserInput.OPTION_TEXT, - "help": "Temperature", - "default": 0.1, - "coerce_type": float, - "max": 2.0, - "tooltip": "Temperature indicates how strict the model will gravitate towards the most " - "probable next token. A score close to 0 returns more predictable " - "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", - } + }) if not is_media_parent: - options["truncate_input"] = { + options.update({ + "truncate_input": { + "type": UserInput.OPTION_TEXT, + "help": "Max chars in input value", + "default": 0, + "coerce_type": int, + "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", + "requires": "use_media==false", + }, + "max_tokens": { + "type": UserInput.OPTION_TEXT, + "help": "Max output tokens", + "default": 10000, + "coerce_type": int, + "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " + "text for common English text. This includes tokens spent for reasoning.", + }, + "batches": { + "type": UserInput.OPTION_TEXT, + "help": "Items per prompt", + "coerce_type": int, + "default": 1, + "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " + "wherever the column brackets are used (e.g. '[body]').", + "requires": "use_media==false", + }, + "batch_info": { + "type": UserInput.OPTION_INFO, + "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " + "need to support structured output for batching. This processor uses JSON schemas to ensure " + "symmetry between input and output lengths, but models may struggle to match input and output " + "values. Describe the dataset values in plurals in your prompt when batching. If you use " + "multiple column brackets in your prompt, rows with any empty values are skipped.", + "requires": "use_media==false", + } + }) + + options.update({ + "ethics_warning3": { + "type": UserInput.OPTION_INFO, + "requires": "api_or_local==api", + "help": "When using LLMs through commercial parties, always consider anonymising your data and " + "whether local open-source LLMs are also an option.", + }, + "save_annotations": { + "type": UserInput.OPTION_ANNOTATION, + "label": "prompt outputs", + "default": False, + }, + "hide_think": { + "type": UserInput.OPTION_TOGGLE, + "help": "Hide reasoning", + "default": False, + "tooltip": "Some models include reasoning in their output, between tags. This option " + "removes this tag and its contents from the output.", + }, + "limit": { "type": UserInput.OPTION_TEXT, - "help": "Max chars in input value", + "help": "Only annotate this many items, then stop", "default": 0, "coerce_type": int, - "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", - "requires": "use_media==false", - } - - options["max_tokens"] = { - "type": UserInput.OPTION_TEXT, - "help": "Max output tokens", - "default": 10000, - "coerce_type": int, - "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " - "text for common English text. This includes tokens spent for reasoning.", - } - - if not is_media_parent: - options["batches"] = { + "min": 0, + "delegated": True, + }, + "annotation_label": { "type": UserInput.OPTION_TEXT, - "help": "Items per prompt", - "coerce_type": int, - "default": 1, - "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " - "wherever the column brackets are used (e.g. '[body]').", - "requires": "use_media==false", - } - options["batch_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " - "need to support structured output for batching. This processor uses JSON schemas to ensure " - "symmetry between input and output lengths, but models may struggle to match input and output " - "values. Describe the dataset values in plurals in your prompt when batching. If you use " - "multiple column brackets in your prompt, rows with any empty values are skipped.", - "requires": "use_media==false", + "help": "Label for the annotations to add to the dataset", + "default": "", + "delegated": True, } - - options["ethics_warning3"] = { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", - "help": "When using LLMs through commercial parties, always consider anonymising your data and " - "whether local open-source LLMs are also an option.", - } - options["save_annotations"] = { - "type": UserInput.OPTION_ANNOTATION, - "label": "prompt outputs", - "default": False, - } - options["hide_think"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Hide reasoning", - "default": False, - "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - } - options["limit"] = { - "type": UserInput.OPTION_TEXT, - "help": "Only annotate this many items, then stop", - "default": 0, - "coerce_type": int, - "min": 0, - "delegated": True, - } - options["annotation_label"] = { - "type": UserInput.OPTION_TEXT, - "help": "Label for the annotations to add to the dataset", - "default": "", - "delegated": True, - } + }) # Get the media columns for the select media columns option if not is_media_parent and parent_dataset and parent_dataset.get_columns(): @@ -351,7 +355,7 @@ def process(self): api_key = "" client_kwargs = {} - # load model and providermetadata + # load model and provider metadata chosen_model_id = self.parameters.get("model") available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} if chosen_model_id not in available_models: @@ -413,7 +417,7 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" - self.dataset.log(f"Using LLM provider '{model['provider']}' with model '{model}'{base_url_str}") + self.dataset.log(f"Using LLM provider '{model['provider_type'] if provider['url'] else provider['provider']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( config=self.config, @@ -996,7 +1000,7 @@ def process(self): # Final outputs time_end = time.time() time_progressed = str(timedelta(seconds=int(time_end - time_start))) - final_status = f"Finished, {model} generated text in {time_progressed}." + final_status = f"Finished, {model['local_id']} generated text in {time_progressed}." skipped_str = None if not skipped else f" Skipped {skipped} rows because of empty values." if skipped_str: self.dataset.finish_with_warning(i, final_status + skipped_str) From d86a3092a2568384c3e95a553383be33f4a2bfa7 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:02:43 +0200 Subject: [PATCH 19/53] Control panel text & names --- common/lib/config_definition.py | 8 +++++--- webtool/templates/controlpanel/layout.html | 2 +- webtool/templates/controlpanel/llm-server.html | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 7cf0c6483..0078fa681 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -575,9 +575,11 @@ # allows 4CAT LLM processors to connect to a local or remote LLM server "llm.intro": { "type": UserInput.OPTION_INFO, - "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect " - "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " - "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." + "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as " + "connect to local or remote LLM servers. You can also set up your own LLM server using open source " + "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for " + "your users. After configuring providers you can enable and disable available models via the 'LLMs & " + "Providers' page in the Control Panel." }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index 400a33846..26d03df20 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -19,7 +19,7 @@ {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} - LLM Server{% endif %} + LLMs & Providers{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 240ebae69..4b4285dd8 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -7,7 +7,7 @@ {% block body %}
-

LLM Server

+

LLM Providers

{% if flashes %}
From feb4a8451478f8125f7fc11a3a4426c53fd6428a Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:29:13 +0200 Subject: [PATCH 20/53] Rework Ollama model info API request & parsing --- common/lib/llm/clients/ollama_client.py | 141 ++++++------------ common/lib/llm/clients/openai_client.py | 2 +- .../templates/controlpanel/llm-server.html | 24 +-- 3 files changed, 57 insertions(+), 110 deletions(-) diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py index 3064c1173..7c05c94f7 100644 --- a/common/lib/llm/clients/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -5,8 +5,6 @@ helpers for capability parsing, display-name formatting, and building canonical llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. """ - -import re import requests from common.lib.llm.llm_client import LLMProviderClient @@ -19,6 +17,33 @@ class OllamaClient(LLMProviderClient): _models_info_key = "models" _model_id_key = "model" + def list_models(self) -> list[dict]: + """ + List all models available. + + For Ollama, get some additional model info via an extra API request. + + :return list[dict]: List of models available.: + """ + models = super().list_models() + result = [] + for model in models: + try: + model_info = self._session.post( + f"{self.base_url}/api/show", + json={"model": model[self._model_id_key]}, + headers=self._headers, + timeout=self.timeout, + ).json() + result.append({**model, "model_info": model_info["model_info"]}) + except (requests.exceptions.HTTPError, KeyError) as e: + self.log.warning( + f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") + + return result + + + def parse_supported_media_types(self, meta: dict) -> list[str]: """Derive the media types a model supports from its Ollama metadata. @@ -71,97 +96,29 @@ def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - :param dict meta: Model metadata - :returns str: Human-readable display name string. + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. """ - model_info = meta.get("model_info", {}) if meta else {} - model_id = self.get_global_model_id(meta) - details = meta.get("details", {}) if meta else {} - - basename = None - for key in ("general.basename", "general.base_model.0.name"): - val = model_info.get(key) - if val: - basename = str(val).strip() - break - if not basename: - basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id - - def _parse_param_count(val): - if val is None: - return None - if isinstance(val, int): - return val - if isinstance(val, float): - return int(val) - s = str(val).strip().replace(",", "") - if not s: - return None - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) - if m: - num = float(m.group(1)) - suf = m.group(2).upper() - return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) - try: - return int(float(s)) - except Exception: - return None - - def _humanize(n): - if n is None: - return None - n = int(n) - if n >= 1_000_000_000: - x = n / 1_000_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith(".0"): - s = s[:-2] - return f"{s}B" - if n >= 1_000_000: - x = n / 1_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith(".0"): - s = s[:-2] - return f"{s}M" - return f"{n:,}" - - param_candidate = None - for key in ("parameter_size", "parameter_count"): - if key in details: - param_candidate = details.get(key) - break - if param_candidate is None: - param_candidate = model_info.get("general.parameter_count") - human = _humanize(_parse_param_count(param_candidate)) - - size_label = model_info.get("general.size_label") - size_label_norm = str(size_label).strip() if size_label else None - - tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - - if tag: - tl = tag.lower() - if tl in ("latest", "stable", "current"): - suffix = f"{tag} · {human}" if human else tag - else: - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", tag) - if m: - tag_size = f"{m.group(1)}{m.group(2).upper()}" - if size_label_norm and size_label_norm.upper() == tag_size.upper(): - suffix = size_label_norm - else: - suffix = tag_size - else: - suffix = f"{tag} · {human}" if human else tag - else: - if size_label_norm: - suffix = size_label_norm - elif human: - suffix = human - else: - return model_id - - return f"{basename} ({suffix})" + model_name = self.get_model_id(meta) + + extra_bits = [] + if meta.get("model_info"): + if meta["model_info"].get("general.basename"): + model_name = meta["model_info"]["general.basename"] + + if meta["model_info"].get("general.finetune"): + extra_bits.append(meta["model_info"]["general.finetune"]) + + if meta["model_info"].get("general.size_label"): + extra_bits.append(meta["model_info"]["general.size_label"]) + + elif meta.get("details") and meta["details"].get("parameter_size"): + extra_bits.append(f"{meta['details']['parameter_size']} parameters") + + model_name += f" ({', '.join(extra_bits)})" + + return model_name def get_model_card_url(self, meta: dict) -> str: """ diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py index fa04446c0..f8701dd7c 100644 --- a/common/lib/llm/clients/openai_client.py +++ b/common/lib/llm/clients/openai_client.py @@ -44,7 +44,7 @@ def format_display_name(self, meta: dict) -> str: :param meta: ``/api/show`` response dict, or ``None``. :returns: Human-readable display name string. """ - model_name = self.get_global_model_id(meta) + model_name = self.get_model_id(meta) if meta.get("display_name"): model_name = meta["display_name"] diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 4b4285dd8..c31b92e93 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -33,12 +33,10 @@

LLM Providers

{% else %} {% for provider in providers %} - {{ provider.name }} + {{ provider.type }} {{ provider.name }} {% if provider.status == "online" %} Online - {% elif provider.status == "not configured" %} - Not configured {% else %} {{ provider.status }} {% endif %} @@ -64,23 +62,17 @@

- + Name Provider/model - Display name Capabilities Status - Actions {% if available_models %} {% for model_id, model in available_models.items() %} - - {{ model.provider_type }}/{{ model.provider|hostname }} - {{ model.local_id }} - {% if model.model_card %} {{ model.name }} @@ -89,14 +81,11 @@

{% endif %} - {{ model.supported_media_types | join(", ") }} + {{ model.provider_type }}/{{ model.provider|hostname }}
+ {{ model.local_id }} - {% if model_id in enabled_models %} - Enabled - {% else %} - Disabled - {% endif %} + {{ model.supported_media_types | join(", ") }} {% if model_id in enabled_models %} @@ -128,7 +117,8 @@

{% if providers %} - No models found. Use the Refresh button to fetch available models, or pull a new model below. + No models found. Use the Refresh button to fetch available models, or + install a new model below with compatible providers. {% else %} No LLM providers configured. {% endif %} From 9084cc5857a6eb4489fb0cb9390b14ce0be0ebb4 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:33:37 +0200 Subject: [PATCH 21/53] Fix Ollama capability detection --- common/lib/llm/clients/ollama_client.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py index 7c05c94f7..e21297448 100644 --- a/common/lib/llm/clients/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -35,7 +35,7 @@ def list_models(self) -> list[dict]: headers=self._headers, timeout=self.timeout, ).json() - result.append({**model, "model_info": model_info["model_info"]}) + result.append({**model, "metadata": model_info}) except (requests.exceptions.HTTPError, KeyError) as e: self.log.warning( f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") @@ -61,10 +61,10 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: Returns ``[]`` when ``meta`` is ``None`` (unknown — callers should include the model, not block it). """ - if meta is None: + if meta is None or not meta.get("metadata"): return [] - capabilities = meta.get("capabilities", []) + capabilities = meta["metadata"].get("capabilities", []) media_types: list[str] = [] _cap_map = { @@ -103,15 +103,16 @@ def format_display_name(self, meta: dict) -> str: model_name = self.get_model_id(meta) extra_bits = [] - if meta.get("model_info"): - if meta["model_info"].get("general.basename"): - model_name = meta["model_info"]["general.basename"] + if meta.get("metadata") and meta["metadata"].get("model_info"): + more_meta = meta["metadata"]["model_info"] + if more_meta.get("general.basename"): + model_name = more_meta["general.basename"] - if meta["model_info"].get("general.finetune"): - extra_bits.append(meta["model_info"]["general.finetune"]) + if more_meta.get("general.finetune"): + extra_bits.append(more_meta["general.finetune"]) - if meta["model_info"].get("general.size_label"): - extra_bits.append(meta["model_info"]["general.size_label"]) + if more_meta.get("general.size_label"): + extra_bits.append(more_meta["general.size_label"]) elif meta.get("details") and meta["details"].get("parameter_size"): extra_bits.append(f"{meta['details']['parameter_size']} parameters") From c645d47cdf7ecef4156965e51e763e2fb0618e06 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:35:25 +0200 Subject: [PATCH 22/53] Fix model card URL for external model APIs --- common/lib/llm/clients/thirdparty_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py index 8d2f48602..e1df93d45 100644 --- a/common/lib/llm/clients/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -55,3 +55,12 @@ def build_model_entry(self, meta: dict) -> dict: entry["provider"] = meta["provider"] return entry + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return meta["model_card"] if meta["model_card"] else "" From 5d466915c29e2bbabaa40a5bffa21c23161c0066 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:47:50 +0200 Subject: [PATCH 23/53] Add notice to page if currently updating --- webtool/templates/controlpanel/llm-server.html | 12 +++++++++++- webtool/views/views_llm.py | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index c31b92e93..af9ffe689 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -17,6 +17,10 @@

LLM Providers

{% endif %} +

+ You can add and configure LLM providers via the 'LLM providers' tab on the Settings page. +

+ {# Server status #}
@@ -27,7 +31,7 @@

LLM Providers

{% if not providers %} {% else %} @@ -56,6 +60,12 @@

+ {% if update_running %} +

+ Models are currently being refreshed or installed - reload the page to see up-to-date list. +

+ {% endif %} +
- No LLM providers configured. You can add and configure servers via the 'LLM' tab on the Settings page. + No LLM providers configured.
diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 863d7e9fe..8a70ad910 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -89,10 +89,15 @@ def llm_panel(): available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) + update_running = bool([ + job for job in g.queue.get_all_jobs("manage-llm") if not job.data["interval"] + ]) + return render_template( "controlpanel/llm-server.html", flashes=get_flashed_messages(), providers=providers, available_models=available_models, enabled_models=enabled_models, + update_running=update_running, ) From 3acc27bf31a98ab3f8b61b35ba5f756ad89661b3 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:48:00 +0200 Subject: [PATCH 24/53] Update setting category name --- common/lib/config_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0078fa681..78cd1b7dc 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -763,5 +763,5 @@ "proxies": "Proxied HTTP requests", "image-visuals": "Image visualization", "extensions": "Extensions", - "llm": "LLM Server Settings" + "llm": "LLM Providers" } From a281a82f728988d69887212774b43168dc52f824 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:55:40 +0200 Subject: [PATCH 25/53] Fix PromptCompass --- common/lib/llm/clients/litellm_client.py | 2 +- processors/machine_learning/prompt_compass.py | 36 +++++++------------ 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py index 0da9dc682..cf65497ff 100644 --- a/common/lib/llm/clients/litellm_client.py +++ b/common/lib/llm/clients/litellm_client.py @@ -35,7 +35,7 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: media_types.add("image") if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") + media_types.add("audio") # no way to tell if model supports embeddings input as far as I can see... diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 076bd916f..033166cfd 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -3,7 +3,7 @@ """ from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from common.lib.exceptions import ( QueryParametersException, @@ -63,25 +63,6 @@ def get_prompt_library(config): return prompt_library - @staticmethod - def get_available_models(config): - """ - Get available model providers - - Combine the list defined by the LLMAdapter with known local models. - - :param config: Configuration reader - :return dict: Models and metadata - """ - # get cached local models - models = config.get("llm.available_models", {}) - models = {} if models == [] else models - models.update({k: v for k, v in LLMAdapter.get_models(config).items() if k not in ("none", "custom")}) - - models = {k: v for k, v in models.items() if "model_card" in v} - - return models - @staticmethod def is_compatible_with(module=None, config=None): """ @@ -108,15 +89,22 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = cls.get_available_models(config) + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} options = { "model": { "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": {("local/" if v["provider"] == "local" else f"{v['provider']}/") + k: v["name"] for k, v in available_models.items()}, - "default": sorted(list(available_models.keys()), key=lambda k: k.startswith("local"))[-1] + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] }, } @@ -136,7 +124,7 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model!^=local" + "requires": "model^=api" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, From 4813c07be5437de2014e026c0f5228a14c0a66c0 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:56:41 +0200 Subject: [PATCH 26/53] No longer need LLMAdapter in PromptCompass... --- processors/machine_learning/prompt_compass.py | 1 - 1 file changed, 1 deletion(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 033166cfd..e43e5987c 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -3,7 +3,6 @@ """ from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm.adapter import LLMAdapter from common.lib.exceptions import ( QueryParametersException, From 3f3c8d5a3a7f68388919dc4096b81ab4cc0d88a5 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 19:50:55 +0200 Subject: [PATCH 27/53] Fix LLMPrompter queue IDs --- processors/machine_learning/llm_prompter.py | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 9f174f148..7026aa6c3 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -55,13 +55,19 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: local_queue = "local_models" if not dataset: return local_queue + + model = dataset.parameters.get("model") + if model.startswith("api"): + # API-based models have their own queue - no local resources being + # used so can be concurrent + return f"llm-api-{dataset.key}" else: - if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]: - # Hosted models also go in the local queue since they use the same shared LLM server - return local_queue - - # Queue per model/API type - return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}" + # use the model URL as the queue ID (extracted from the model + # global ID) + # this is not fool-proof, but does mean not more than one dataset + # runs per API server - in the scenario of these running locally, + # it means things do not run concurrently (which is good) + return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: @@ -311,6 +317,7 @@ def is_compatible_with(cls, module=None, config=None): # Text-based datasets if module.get_extension() in ["csv", "ndjson"]: return True + # Media datasets (zip archives with images, video, or audio) if module.get_extension() == "zip" and module.get_media_type() in ("image", "video", "audio"): return True @@ -345,9 +352,8 @@ def process(self): # Set value for batch length in prompts batches = max(1, min(self.parameters.get("batches", 1), self.source_dataset.num_rows)) - use_batches = batches > 1 - if media_columns or is_media_archive: # no batching for media files - use_batches = False + use_batches = batches > 1 and not (media_columns or is_media_archive) # no batching for media files + if not use_batches: self.dataset.delete_parameter("batches") From 098a7197b47b8b057300d8c5385e5055a2bb1e05 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Wed, 27 May 2026 13:00:01 +0200 Subject: [PATCH 28/53] Enabled models updating --- backend/workers/llm_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 398b835ca..14615344f 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -76,6 +76,8 @@ def work(self): self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") if available_models is not None: + enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) self.config.set("llm.available_models", available_models) + self.config.set("llm.enabled_models", list(enabled_and_available) self.job.finish() From 5d9e5051c948c81cd284eb4ee0c13afb9f786ca0 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 28 May 2026 18:18:53 +0200 Subject: [PATCH 29/53] typoes --- backend/workers/llm_manager.py | 2 +- processors/machine_learning/prompt_compass.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 14615344f..38d9405e0 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -78,6 +78,6 @@ def work(self): if available_models is not None: enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) self.config.set("llm.available_models", available_models) - self.config.set("llm.enabled_models", list(enabled_and_available) + self.config.set("llm.enabled_models", list(enabled_and_available)) self.job.finish() diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index e43e5987c..46386790e 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -71,9 +71,7 @@ def is_compatible_with(module=None, config=None): :param ConfigManager|None config: Configuration reader (context-aware) :return bool: """ - models = PromptCompassRunner.get_available_models(config) - return (models - and module.is_top_dataset() + return (module.is_top_dataset() and module.get_extension() in ("csv", "ndjson")) @classmethod From 4b94c41aa8babb38c10826ec2413df5138acdd9b Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 1 Jun 2026 17:41:01 +0200 Subject: [PATCH 30/53] Rework provider config to store as dict One little hack to prevent a lot of others --- backend/workers/llm_manager.py | 4 +-- common/lib/config_definition.py | 4 ++- common/lib/llm/adapter.py | 12 ++++---- common/lib/llm/clients/thirdparty_client.py | 2 +- common/lib/llm/llm_client.py | 10 +++---- common/lib/user_input.py | 9 ++++-- processors/machine_learning/llm_prompter.py | 27 ++++++++++++----- processors/machine_learning/prompt_compass.py | 29 +++++++++---------- processors/metrics/rank_attribute.py | 8 +++-- .../components/datasource-option.html | 9 ++++-- .../components/processor-option.html | 11 ++++++- .../templates/controlpanel/llm-server.html | 10 ++++--- webtool/views/views_llm.py | 4 +-- 13 files changed, 86 insertions(+), 53 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 38d9405e0..1b3c62533 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -39,8 +39,8 @@ def work(self): model_name = self.job.data["remote_id"] available_models = None - for provider_config in self.config.get("llm.providers", []): - if provider and provider != provider_config["url"]: + for provider_id, provider_config in self.config.get("llm.providers", {}).items(): + if provider and provider != provider_id: continue try: diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 78cd1b7dc..0c06d0203 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -9,6 +9,7 @@ """ from common.lib.user_input import UserInput +import re config_definition = { "datasources.intro": { @@ -594,6 +595,7 @@ ], "global": True, "help": "LLM providers", + "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""), "options": { "name": { "type": UserInput.OPTION_TEXT, @@ -617,7 +619,7 @@ "type": UserInput.OPTION_TEXT, "default": "", "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema (e.g. 'https://').", }, "auth_header": { "type": UserInput.OPTION_TEXT, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 9fe80eb49..8e4c7bc26 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -36,7 +36,7 @@ def __init__( :param max_tokens: Max tokens to generate :param client_kwargs: Optional parameters for the LLM adapter class """ - known_providers = {p['url']: p for p in config.get("llm.providers")} + known_providers = config.get("llm.providers", {}) self.model = model self.provider = known_providers.get(model['provider']) @@ -216,7 +216,7 @@ def _format_media_block( :param media_category: "image", "video", or "audio" :returns: Provider-formatted content block """ - if self.provider == "anthropic": + if self.provider["type"] == "anthropic": if media_category == "image": if url: return {"type": "image", "source": {"type": "url", "url": url}} @@ -232,13 +232,13 @@ def _format_media_block( return {"type": "document", "source": { "type": "base64", "media_type": mime_type, "data": b64_data }} - elif self.provider == "google": + elif self.provider["type"] == "google": if url: return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" return {"type": "image_url", "image_url": {"url": data_uri}} - elif self.provider == "ollama": + elif self.provider["type"] == "ollama": if media_category != "image": raise ValueError(f"Ollama provider only supports image media, got category '{media_category}'") if url: @@ -258,7 +258,7 @@ def _format_media_block( return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" - if media_category == "audio" and self.provider == "openai": + if media_category == "audio" and self.provider["type"] == "openai": return {"type": "input_audio", "input_audio": { "data": b64_data, "format": mime_type.split("/")[-1] }} @@ -274,7 +274,7 @@ def set_structure(self, json_schema): json.dumps(json_schema) # To validate / raise an error # LM Studio needs some more guidance - if self.provider == "lmstudio": + if self.provider["type"] == "lmstudio": json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} self.llm = self.llm.bind(response_format=json_schema) else: diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py index e1df93d45..2a2db4dc3 100644 --- a/common/lib/llm/clients/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -52,7 +52,7 @@ def build_model_entry(self, meta: dict) -> dict: :returns: Dict ready to store under ``llm.available_models[model_id]``. """ entry = super().build_model_entry(meta) - entry["provider"] = meta["provider"] + entry["provider_key"] = meta["provider"] return entry diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index b59d38ab0..b31cb035a 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -14,7 +14,7 @@ class LLMProviderClient: _headers = {} - _meta = {} + provider_config = {} @staticmethod def get_client(config, provider_config: dict) -> "LLMProviderClient": @@ -49,8 +49,7 @@ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) - :param Logger log: 4CAT log handler """ self.config = config - - self._meta = provider_config + self.provider_config = provider_config self.timeout = timeout self.auth_type = provider_config.get("auth_header") @@ -126,8 +125,7 @@ def build_model_entry(self, meta: dict) -> dict: "local_id": self.get_model_id(meta), "name": self.format_display_name(meta), "model_card": self.get_model_card_url(meta), - "provider_type": self._meta["type"], - "provider": self._meta["url"], + "provider": self.provider_config["_id"], "supported_media_types": self.parse_supported_media_types(meta), "metadata": meta, } @@ -193,4 +191,4 @@ def get_global_model_id(self, meta: dict) -> str: :param dict meta: Model metadata :return str: Model ID """ - return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta))) \ No newline at end of file + return "-".join((self.provider_config["type"], self.provider_config["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 16a583d74..ec0798b3e 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -1,3 +1,4 @@ +from attr.validators import is_callable from dateutil.parser import parse as parse_datetime from common.lib.exceptions import QueryParametersException from werkzeug.datastructures import ImmutableMultiDict @@ -203,7 +204,6 @@ def parse_all(options, input, silently_correct=True): if input_index not in input_items: input_items[input_index] = {} - print(key, value) input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) # discard items that are only default values @@ -217,7 +217,12 @@ def parse_all(options, input, silently_correct=True): if not only_default: parsed_input[option].append(item) - print(parsed_input[option]) + # may define a mapper to make this a dict + if settings.get("dict_key"): + if callable(settings["dict_key"]): + parsed_input[option] = {settings["dict_key"](value): {**value, "_id": settings["dict_key"](value)} for value in parsed_input[option]} + else: + parsed_input[option] = {value[settings["dict_key"]]: {**value, "_id": value[settings["dict_key"]]} for value in parsed_input[option]} elif option not in input: # not provided? use default diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 7026aa6c3..3d6633919 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -70,14 +70,27 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" @classmethod - def get_options(cls, parent_dataset=None, config=None) -> dict: - # Check if 4CAT wide LLM server is available + def get_model_library(cls, config): available_models = config.get("llm.available_models", []) enabled_model_ids = config.get("llm.enabled_models", []) + providers = config.get("llm.providers", {}) if not config.get("llm.access"): enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] - enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} + models_option = {} + for key, value in {k: v for k, v in available_models.items() if k in enabled_model_ids}.items(): + provider = providers[value["provider"]] + if provider["name"] not in models_option: + models_option[provider["name"]] = {} + + models_option[provider["name"]][key] = value["name"] + + return models_option + + @classmethod + def get_options(cls, parent_dataset=None, config=None) -> dict: + # Check if 4CAT wide LLM server is available + models = cls.get_model_library(config) # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False @@ -98,9 +111,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": { - model_id: model["name"] for model_id, model in enabled_models.items() - }, + "options": models, "default": "none", "tooltip": "Select from the predefined model list or insert manually", }, @@ -110,7 +121,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_model^=api", + "requires": "model^=api", "sensitive": True, } } @@ -267,7 +278,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: options.update({ "ethics_warning3": { "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", + "requires": "model^=api-", "help": "When using LLMs through commercial parties, always consider anonymising your data and " "whether local open-source LLMs are also an option.", }, diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 46386790e..7e4343bf7 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -13,7 +13,7 @@ import json -class PromptCompassRunner(ProcessorPreset): +class PromptCompassRunner(): """ Run processor pipeline to feed prompts to LLM Prompter """ @@ -98,20 +98,19 @@ def get_options(cls, parent_dataset=None, config=None): "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": { - model_id: model["name"] for model_id, model in enabled_models.items() - }, + "options": LLMPrompter.get_model_library(config), "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] }, } - for model, metadata in available_models.items(): - model_key = metadata["provider"] + "/" + model - options[f"{model_key}-info"] = { - "type": UserInput.OPTION_INFO, - "help": f"Read the [model card]({metadata['model_card']}) for {model}.", - "requires": f"model=={model_key}" - } + for model, metadata in enabled_models.items(): + if metadata.get("model_card"): + model_key = metadata["provider"] + "/" + model + options[f"{model_key}-info"] = { + "type": UserInput.OPTION_INFO, + "help": f"Read the [model card]({metadata['model_card']}) for {model}.", + "requires": f"model=={model_key}" + } options.update({ "api_key": { @@ -121,20 +120,20 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model^=api" + "requires": "model^=api-" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, "help": "Hide reasoning", "default": False, "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - "requires": "model^=local/deepseek" + "removes this tag and its contents from the output, if present.", }, "temperature": { "type": UserInput.OPTION_TEXT, "help": "Temperature", - "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange results", + "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange " + "results. Does not have an effect on all models.", "coerce_type": float, "min": 0.0, "max": 1.0, diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 3c7c03286..8e18bb2bf 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -82,9 +82,11 @@ def get_options(cls, parent_dataset=None, config=None): "type": UserInput.OPTION_CHOICE, "options": { "none": "Use column value", - "urls": "URLs", - "hostnames": "Domain names", - "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + "URL-related": { + "urls": "URLs", + "hostnames": "Domain names", + "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + }, "hashtags": "Hashtags (words starting with #)", "emoji": "Emoji (each used emoji in the column is counted individually)", "occurrence": "Values (the number of comma-separated values in the given field)" diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 867a339c8..cd8694fec 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -12,8 +12,13 @@

{{ settings.help }}

    {# always include an empty item #} {% set empty_item = settings.options|propmap("default") %} - {% do settings.default.append(empty_item) %} - {% for item in settings.default %} + {% if settings['default'] is mapping %} + {% set current_value = settings.default.values()|list %} + {% else %} + {% set current_value = settings.default %} + {% endif %} + {% do current_value.append(empty_item) %} + {% for item in current_value %} {% set outerloop = loop %} {% set last_index = outerloop.index %}
  1. diff --git a/webtool/templates/components/processor-option.html b/webtool/templates/components/processor-option.html index 1fbdd728e..dbe92ab27 100644 --- a/webtool/templates/components/processor-option.html +++ b/webtool/templates/components/processor-option.html @@ -22,7 +22,16 @@ {% elif option_settings.type == "choice" %} {% elif option_settings.type in ("multi", "annotations") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index af9ffe689..691df0d42 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -35,7 +35,7 @@

    LLM Providers

    {% else %} - {% for provider in providers %} + {% for provider_id, provider in providers.items() %}
{{ provider.type }} {{ provider.name }} @@ -91,7 +91,7 @@

{% endif %}

- {{ model.provider_type }}/{{ model.provider|hostname }}
+ {{ providers[model.provider]['type'] }}/{{ providers[model.provider].url|hostname }}
{{ model.local_id }}
@@ -111,7 +111,7 @@

{% endif %} - {% if model.provider_type == "ollama" %} + {% if providers[model.provider].type == "ollama" %}
@@ -140,7 +140,9 @@

{# Pull a new model, if an ollama server is configured #} - {% if providers|selectattr("type", "equalto", "ollama")|list %} + {% set can_add_models = False %} + {% for provider_id, provider in providers.items() %}{% if provider.type == "ollama" %}{% set can_add_models = True %}{% endif %}{% endfor %} + {% if can_add_models %}

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 8a70ad910..2c02e603c 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -76,7 +76,7 @@ def llm_panel(): # --- GET: render panel --- - for i, provider in enumerate(providers): + for provider_id, provider in providers.items(): client = LLMProviderClient.get_client(g.config, provider) if provider_status := client.get_status(): @@ -84,7 +84,7 @@ def llm_panel(): else: server_status = "unreachable" - providers[i]["status"] = server_status + providers[provider_id]["status"] = server_status available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) From fe5e8f10b0fd7447f11fa9fade0393fa902797d5 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 1 Jun 2026 18:05:00 +0200 Subject: [PATCH 31/53] Further cleanup --- common/lib/user_input.py | 8 +++++--- processors/machine_learning/llm_prompter.py | 14 ++++++-------- processors/machine_learning/prompt_compass.py | 14 ++------------ 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/common/lib/user_input.py b/common/lib/user_input.py index ec0798b3e..df0d87c4a 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -1,10 +1,10 @@ -from attr.validators import is_callable from dateutil.parser import parse as parse_datetime from common.lib.exceptions import QueryParametersException from werkzeug.datastructures import ImmutableMultiDict import json import re +from itertools import chain class RequirementsNotMetException(Exception): """ @@ -435,9 +435,11 @@ def parse_value(settings, choice, other_input=None, silently_correct=True): # select box # one out of multiple options # return option if valid, or default - if choice not in settings.get("options"): + options = settings.get("options", []) + match_options = chain(*[list(o.keys()) for o in options.values()]) if type(options) is dict else options + if choice not in match_options: if not silently_correct: - raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") + raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.") else: return settings.get("default", "") else: diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 3d6633919..b5cb94e78 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -379,15 +379,13 @@ def process(self): return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") model = available_models[chosen_model_id] + provider = self.config.get("llm.providers").get(model["provider"]) - if model["provider_type"] == "api" and not api_key: - return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") - - available_providers = {p["url"]: p for p in self.config.get("llm.providers")} - if model["provider"] not in available_providers: - return self.dataset.finish_with_error(f"Model provider {model['provider']} unknown") + if not provider: + return self.dataset.finish_with_error(f"Model provider for {chosen_model_id} not currently available.") - provider = available_providers[model["provider"]] + if provider["type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -434,7 +432,7 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" - self.dataset.log(f"Using LLM provider '{model['provider_type'] if provider['url'] else provider['provider']}' with model '{model['local_id']}'{base_url_str}") + self.dataset.log(f"Using LLM provider '{provider['_id']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( config=self.config, diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 7e4343bf7..56fec81b2 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -211,24 +211,14 @@ def get_processor_pipeline(self): if short_name: self.dataset.update_label(f"PromptCompass ({short_name})") - chosen_model = "/".join(self.parameters.get("model").split("/")[1:]) - models = self.get_available_models(self.config) - if chosen_model not in models: + if self.parameters.get("model") not in config.get("llm.enabled_models", []): return self.dataset.finish_with_error(f"Model {self.parameters['model']} is not available, halting processor.") - model = models[chosen_model] - pipeline = [ { "type": "llm-prompter", "parameters": { - "api_or_local": "local" if model["provider"] == "local" else "api", - "api_model": chosen_model if model["provider"] != "local" else "", - "api_key": self.parameters.get("api_key"), - "api_custom_model_provider": "", - "local_provider": self.config.get("llm.provider_type"), - "local_base_url": self.config.get("llm.server"), - "ollama_model": chosen_model if model["provider"] == "local" else "", + "model": self.parameters.get("model"), "prompt": self.parameters[self.parameters["task"]], "structured_output": False, "temperature": self.parameters["temperature"], From bd1fd4594d6a428cac2a6c046f2382c277e69222 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:13:08 +0200 Subject: [PATCH 32/53] Fix INPUT_CHOICE checking if categorised --- common/lib/user_input.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/common/lib/user_input.py b/common/lib/user_input.py index df0d87c4a..a6fe10458 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -72,8 +72,6 @@ def parse_all(options, input, silently_correct=True): if type(input) is not dict and type(input) is not ImmutableMultiDict: raise TypeError("input must be a dictionary or ImmutableMultiDict") - print(input) - if type(input) is ImmutableMultiDict: # we are not using to_dict, because that messes up multi-selects input = {key: input.getlist(key) for key in input} @@ -436,7 +434,12 @@ def parse_value(settings, choice, other_input=None, silently_correct=True): # one out of multiple options # return option if valid, or default options = settings.get("options", []) - match_options = chain(*[list(o.keys()) for o in options.values()]) if type(options) is dict else options + + # if we have a categorised set of options, look deeper to get + # valid option values + is_categorised = all([type(o) is dict for o in options.values()]) + match_options = chain(*[list(o.keys()) for o in options.values()]) if is_categorised else options + if choice not in match_options: if not silently_correct: raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.") From a6a8f11189f2ef447f5ab08040a31114f200ae26 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:37:18 +0200 Subject: [PATCH 33/53] Migrate script --- helper-scripts/migrate/migrate-1.54-1.55.py | 80 +++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 helper-scripts/migrate/migrate-1.54-1.55.py diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py new file mode 100644 index 000000000..7a5074774 --- /dev/null +++ b/helper-scripts/migrate/migrate-1.54-1.55.py @@ -0,0 +1,80 @@ +import json +import sys +import os + +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../..")) +from common.lib.database import Database +from common.lib.logger import Logger + +import configparser # noqa: E402 + +log = Logger(output=True) +ini = configparser.ConfigParser() +ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini")) +db_config = ini["DATABASE"] + +db = Database( + logger=log, + dbname=db_config["db_name"], + user=db_config["db_user"], + password=db_config["db_password"], + host=db_config["db_host"], + port=db_config["db_port"], + appname="4cat-migrate", +) + +# the separate LLM server settings were consolidated into one overarching 'llm.providers' setting +print(" Checking if llm.providers setting exists...") +has_setting = db.fetchone( + "SELECT COUNT(*) AS num FROM settings WHERE name = 'llm.providers'" +) + +if has_setting["num"] > 0: + print(" ...exists, deleting old settings without overwriting") +else: + print(" ...does not exist, filling with currently configured proviers") + provider_type = db.fetchone("SELECT value FROM settings WHERE name = 'llm.provider_type'") + providers = {} + if not provider_type: + print(" ...no provider currently configured") + else: + url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'") + host = url.split("/")[2] if "://" in url else "localhost" + auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'") + auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'") + provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'") + provider_id = f"{provider_type}-{host}" + + # vLLM and LM Studio are both openai-like + provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like") + providers[provider_id] = { + "name": provider_name, + "type": provider_type, + "url": url, + "auth_header": auth_header, + "auth_key": auth_key, + "_id": provider_id + } + + # add API models, always present + providers["thirdparty-models"] = { + "name": "Third-party models", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "", + "_id": "thirdparty-models" + } + + db.insert("settings", {"name": "llm.providers", "value": json.dumps(providers)}) + print(f" ...added {len(providers)} providers") + +print(" Cleaning up old settings") +db.execute("DELETE FROM settings WHERE name LIKE 'llm.%' AND name NOT IN ('llm.providers', 'llm.available_models', 'llm.access')") + +print(" Removing all known models (will be re-indexed on 4CAT restart)") +db.upsert("settings", {"name": "llm.available_models", "value": "{}"}) + +print(" - done!") From 1863e7288b665b214c75990dad54daa6ced38ac9 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:39:04 +0200 Subject: [PATCH 34/53] PromptCompass is BACK --- processors/machine_learning/prompt_compass.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 56fec81b2..3fbc3adfa 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -13,7 +13,8 @@ import json -class PromptCompassRunner(): + +class PromptCompassRunner(ProcessorPreset): """ Run processor pipeline to feed prompts to LLM Prompter """ @@ -25,8 +26,8 @@ class PromptCompassRunner(): extension = "ndjson" references = [ - "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", - "See the processor options for references to the sources of each prompt in the library." + "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", + "See the processor options for references to the sources of each prompt in the library." ] @staticmethod @@ -39,7 +40,7 @@ def get_prompt_library(config): prompt_library_file = config.get("PATH_ROOT").joinpath("common/assets/prompt_library.json") if not prompt_library_file.exists(): return [] - + with prompt_library_file.open(encoding="utf-8") as infile: prompt_library = json.load(infile) @@ -149,7 +150,7 @@ def get_options(cls, parent_dataset=None, config=None): }) for i, task in enumerate(prompt_library): - task_key = f"task-{i+1}" + task_key = f"task-{i + 1}" options[task_key] = { "type": UserInput.OPTION_TEXT_LARGE, "requires": f"task=={task_key}", @@ -212,7 +213,8 @@ def get_processor_pipeline(self): self.dataset.update_label(f"PromptCompass ({short_name})") if self.parameters.get("model") not in config.get("llm.enabled_models", []): - return self.dataset.finish_with_error(f"Model {self.parameters['model']} is not available, halting processor.") + return self.dataset.finish_with_error( + f"Model {self.parameters['model']} is not available, halting processor.") pipeline = [ { @@ -234,7 +236,6 @@ def get_processor_pipeline(self): return pipeline - @staticmethod def validate_query(query, request, config): """ @@ -268,4 +269,4 @@ def map_item(item): :param item: :return: """ - return LLMPrompter.map_item(item) \ No newline at end of file + return LLMPrompter.map_item(item) From e7d0e42eb7306b0bd97de62bfa98584d2508798d Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:41:17 +0200 Subject: [PATCH 35/53] Fix erroneous config reference --- processors/machine_learning/prompt_compass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 3fbc3adfa..8d1d75359 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -35,6 +35,7 @@ def get_prompt_library(config): """ Get prompt library from file + :param config: Config reader :return list: List of prompts and metadata """ prompt_library_file = config.get("PATH_ROOT").joinpath("common/assets/prompt_library.json") @@ -212,7 +213,7 @@ def get_processor_pipeline(self): if short_name: self.dataset.update_label(f"PromptCompass ({short_name})") - if self.parameters.get("model") not in config.get("llm.enabled_models", []): + if self.parameters.get("model") not in self.config.get("llm.enabled_models", []): return self.dataset.finish_with_error( f"Model {self.parameters['model']} is not available, halting processor.") From 9eee696bf1c6b5de0317a509f385c9f8b8c09140 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:47:54 +0200 Subject: [PATCH 36/53] Guard against forbidden model selection --- processors/machine_learning/llm_prompter.py | 6 ++++++ processors/machine_learning/prompt_compass.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index b5cb94e78..088fa6e75 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -5,6 +5,8 @@ import re import time import json +from itertools import chain + import jsonschema import requests @@ -1105,6 +1107,10 @@ def validate_query(query, request, config): if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + # For media archive datasets, use_media won't be present in the query is_media_archive = "use_media" not in query diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 8d1d75359..00fad8cf2 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -249,6 +249,10 @@ def validate_query(query, request, config): :param config: :return: """ + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + if not query["model"].startswith("local") and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") From b1da7b5a1b48964a28d9666300a0d83a7767dc3d Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:47:58 +0200 Subject: [PATCH 37/53] Bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 006ffd9f6..94144784e 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -1.54 +1.55 This file should not be modified. It is used by 4CAT to determine whether it needs to run migration scripts to e.g. update the database structure to a more From 3d725eacffce9a8ff688373beea5d6184fc400be Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:49:45 +0200 Subject: [PATCH 38/53] Fix default value for llm.providers --- common/lib/config_definition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0c06d0203..88c015dac 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -584,15 +584,15 @@ }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, - "default": [ - { + "default": { + "thirdparty-models": { "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", "type": "api", "url": "", "auth_header": "", "auth_key": "" } - ], + }, "global": True, "help": "LLM providers", "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""), From 41c701dec9f1513b0efbd2957786db3e6a99a0e7 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:50:54 +0200 Subject: [PATCH 39/53] sdadasdasdads --- processors/machine_learning/prompt_compass.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 00fad8cf2..17f7326d8 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -1,6 +1,9 @@ """ Use a prompt from a preset list """ +import json +from itertools import chain + from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput @@ -11,9 +14,6 @@ from processors.machine_learning.llm_prompter import LLMPrompter -import json - - class PromptCompassRunner(ProcessorPreset): """ Run processor pipeline to feed prompts to LLM Prompter From d46821acbfef87f2a955d6375306e946f0a6bb61 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:54:00 +0200 Subject: [PATCH 40/53] Update description of llm.access setting --- common/lib/config_definition.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 88c015dac..0635a4c97 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -653,9 +653,10 @@ }, "llm.access": { "type": UserInput.OPTION_TOGGLE, - "help": "LLM Access", + "help": "Local LLM Access", "default": False, - "tooltip": "Use tags or individual users to allow access to the LLM server (or set True in global for all).", + "tooltip": "If disabled, can only use LLMs from the 'Third-party models' provider. Can be configured per user " + "or tag.", }, # TODO: add setting to restrict models per user/group? From 1a147e8c0ee0cab89f89955599776152bf6b33ac Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:25:53 +0200 Subject: [PATCH 41/53] Add filename and line no to test error output --- tests/test_modules.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index 87ffc06ad..3b9a82579 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,3 +1,4 @@ +import traceback import pytest import time import json @@ -218,7 +219,8 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data processor_class.get_options(parent_dataset=mock_dataset, config=mock_basic_config) except Exception as e: # Log the failure and add it to the failures list - logger.error(f"Processor {processor_name} failed in get_options: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in get_options: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) # Check if processor Class has "options" attribute @@ -230,11 +232,13 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data try: processor_class(logger, job=mock_job, queue=mock_job_queue, manager=None, modules=fourcat_modules) except Exception as e: - logger.error(f"Processor {processor_name} failed in process(): {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in process(): {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) except Exception as e: - logger.error(f"Processor {processor_name} failed while setting up: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed while setting up: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) From fc91a4435bf6975cb507934f7732947f5cbb3833 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:27:32 +0200 Subject: [PATCH 42/53] Fix init issues in LLM processors --- processors/machine_learning/llm_prompter.py | 2 +- processors/machine_learning/prompt_compass.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 088fa6e75..5b4080e76 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -73,7 +73,7 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: @classmethod def get_model_library(cls, config): - available_models = config.get("llm.available_models", []) + available_models = config.get("llm.available_models", {}) enabled_model_ids = config.get("llm.enabled_models", []) providers = config.get("llm.providers", {}) if not config.get("llm.access"): diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 17f7326d8..4f4c7d0d8 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -88,7 +88,7 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = config.get("llm.available_models", []) + available_models = config.get("llm.available_models", {}) enabled_model_ids = config.get("llm.enabled_models", []) if not config.get("llm.access"): enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] @@ -101,7 +101,7 @@ def get_options(cls, parent_dataset=None, config=None): "help": "Model to use", "tooltip": "Third-party models require an API key to run.", "options": LLMPrompter.get_model_library(config), - "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] if enabled_models else "" }, } From 8c4b34a1f94521208d1f6066671fde2df88e276a Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:38:23 +0200 Subject: [PATCH 43/53] Fix "add model" panel show/hide on LLM page --- webtool/templates/controlpanel/llm-server.html | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 691df0d42..04e29c80a 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -140,9 +140,7 @@

{# Pull a new model, if an ollama server is configured #} - {% set can_add_models = False %} - {% for provider_id, provider in providers.items() %}{% if provider.type == "ollama" %}{% set can_add_models = True %}{% endif %}{% endfor %} - {% if can_add_models %} + {% if providers.values()|selectattr("type", "equalto", "ollama")|list %}

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For From 46fee9fc8e4c6d854a79baac369f32108ff86823 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:41:37 +0200 Subject: [PATCH 44/53] the --- webtool/templates/controlpanel/llm-server.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 04e29c80a..3dc5b5034 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -144,7 +144,7 @@

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For - Ollama, model names can be found in + Ollama, model names can be found in the model library.

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install models for all LLM providers; if your provider is not listed below, it may not be able to add additional @@ -166,7 +166,7 @@

Install new LLMs

- +
From ff0f215334917498b522cb23bb19fbf8e677a0b8 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:59:37 +0200 Subject: [PATCH 45/53] Update Docker setup --- docker/README.md | 8 ++++---- docker/docker_setup.py | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/docker/README.md b/docker/README.md index a697b4921..6ad7e069b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -99,8 +99,8 @@ for you. You can skip to step 2 below. #### Manual configuration (or to verify/change settings) -1. Log in as admin and open **Control Panel → Settings**. -2. Confirm or set the following LLM fields: +1. Log in as admin and open **Control Panel → Settings → LLM Providers**. +2. Confirm that a provider with the following settings is present: | Setting | Value | |---|---| @@ -109,7 +109,7 @@ for you. You can skip to step 2 below. | LLM Access | enabled | 3. Save settings. -4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled). +4. Open **Control Panel → LLMs & Providers** (visible once *LLM Access* is enabled). 5. Use the **Refresh** button to load available models, then **Pull** a model (e.g. `llama3.2:3b`) to download it from the Ollama library. 6. Enable the models you want to make available to users. @@ -137,7 +137,7 @@ point 4CAT directly at that server: - **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. - **Remote server**: use the server's reachable URL and configure any required - API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. + API key in the *Authentication header* and *Authentication key* settings. In both cases, configure the LLM settings manually via **Control Panel → Settings** (see *Manual configuration* above), using the appropriate server URL instead of diff --git a/docker/docker_setup.py b/docker/docker_setup.py index aea641c12..6de76add0 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -28,7 +28,8 @@ def update_config_from_environment(CONFIG_FILE, config_parser): config_parser['SERVER']['public_port'] = os.environ['PUBLIC_PORT'] # Set API - config_parser['API']['api_host'] = os.environ['API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container + config_parser['API']['api_host'] = os.environ[ + 'API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container # Database configuration config_parser['DATABASE']['db_name'] = os.environ['POSTGRES_DB'] @@ -110,14 +111,16 @@ def _format_host(host: str) -> str: # Backend API config_parser.add_section('API') - config_parser['API']['api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network + config_parser['API'][ + 'api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network # File paths # Docker volumes are defined in docker-compose.yml; these rely on one shared volume `data` in the 4CAT root directory config_parser.add_section('PATHS') config_parser['PATHS']['path_images'] = 'data/images' # shared volume defined in docker-compose.yml config_parser['PATHS']['path_data'] = 'data/datasets' # shared volume defined in docker-compose.yml - config_parser['PATHS']['path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean) + config_parser['PATHS'][ + 'path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean) config_parser['PATHS']['path_sessions'] = 'data/sessions' # shared volume defined in docker-compose.yml config_parser['PATHS']['path_logs'] = 'data/logs/' # shared volume defined in docker-compose.yml @@ -137,7 +140,8 @@ def _format_host(host: str) -> str: config = ConfigManager() config.with_db(Database(logger=None, appname="docker-setup", - dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)) + dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, + host=config.DB_HOST, port=config.DB_PORT)) for path in [config.get('PATH_DATA'), config.get('PATH_IMAGES'), @@ -187,10 +191,12 @@ def _format_host(host: str) -> str: # Check to see if flask.server_name needs to be updated from common.config_manager import ConfigManager from common.lib.database import Database + config = ConfigManager() config.with_db(Database(logger=None, appname="docker-setup", - dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)) - + dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, + host=config.DB_HOST, port=config.DB_PORT)) + public_port = int(config_parser['SERVER']['public_port']) # Port handling here is independent from HTTPS; default is 80 default_port = 80 @@ -201,7 +207,8 @@ def _format_host(host: str) -> str: # Warn only when localhost/IP lacks a required non-default port if existing_port is None and _is_ip_or_localhost(host) and public_port != default_port: formatted_host = _format_host(host) - print(f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.") + print( + f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.") print( "You can do so by running the following command if you do not have access to the 4CAT frontend Control Panel:\n" f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" @@ -209,24 +216,29 @@ def _format_host(host: str) -> str: # If an Ollama container is available on the Docker network, configure 4CAT to use it. ollama_url = 'http://ollama:11434' + ollama_id = f"ollama-{ollama_url.split('/')[-1]}" try: import requests + try: resp = requests.get(f"{ollama_url}/api/tags", timeout=2) if resp.status_code == 200: - current_llm_server = config.get("llm.server") - if current_llm_server == ollama_url: + current_llm_providers = config.get("llm.providers") + if any([p["url"] == ollama_url for p in current_llm_providers]): print("Ollama server already configured in 4CAT settings.") - elif current_llm_server and current_llm_server != ollama_url: - # Previously configured LLM server is different; log a warning but do not overwrite user settings - print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.") else: # set basic LLM settings so the initial admin user does not need to # configure them manually for local development environments that # include the Ollama sidecar. - config.set('llm.provider_type', 'ollama') - config.set('llm.server', ollama_url) - config.set('llm.access', True) + current_llm_providers[ollama_id] = { + "name": "Ollama Server (4CAT, via Docker)", + "url": ollama_url, + "type": "ollama", + "auth_header": "", + "auth_key": "", + "_id": ollama_id + } + config.set('llm.providers', current_llm_providers) config.db.commit() print('Detected Ollama on Docker network; configured LLM settings to use it.') except requests.RequestException: From ac2ba3002ea17c4719ade6d0bc9199a4459abebf Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 16:37:40 +0200 Subject: [PATCH 46/53] Clean up docstrings, etc --- backend/workers/llm_manager.py | 3 ++ common/lib/llm/adapter.py | 34 +++++++------- common/lib/llm/clients/litellm_client.py | 16 ++----- common/lib/llm/clients/ollama_client.py | 49 ++++++++++----------- common/lib/llm/clients/openai_client.py | 22 +++++---- common/lib/llm/clients/thirdparty_client.py | 15 +++---- common/lib/llm/llm_client.py | 43 +++++++++--------- 7 files changed, 83 insertions(+), 99 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 1b3c62533..f6b2887ce 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -34,6 +34,9 @@ def ensure_job(cls, config=None): return {"remote_id": "manage-llm-refresh", "interval": 86400} def work(self): + """ + Manage LLM models and providers + """ task = self.job.details.get("task", "refresh") if self.job.details else "refresh" provider = self.job.details.get("provider", "") if self.job.details else None model_name = self.job.data["remote_id"] diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 8e4c7bc26..5a6a60380 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -20,7 +20,7 @@ class LLMAdapter: def __init__( self, config, - model, + model: dict, api_key: Optional[str] = None, temperature: float = 0.1, max_tokens: int = 1000, @@ -30,11 +30,11 @@ def __init__( Instantiate an adapter to interface with an LLM model :param config: 4CAT config reader - :param model: Model metadata (as in `llm.available_models` 4CAT setting) - :param api_key: API key, if needed - :param temperature: Temperature hyperparameter - :param max_tokens: Max tokens to generate - :param client_kwargs: Optional parameters for the LLM adapter class + :param dict model: Model metadata (as in `llm.available_models` 4CAT setting) + :param str api_key: API key, if needed + :param float temperature: Temperature hyperparameter + :param int max_tokens: Max tokens to generate + :param dict client_kwargs: Optional parameters for the LLM adapter class """ known_providers = config.get("llm.providers", {}) @@ -50,6 +50,11 @@ def __init__( self.llm: BaseChatModel = self._load_llm() def _load_llm(self) -> BaseChatModel: + """ + Load appropriate langchain chat class + + :return BaseChatModel: Langchain chat model for interfacing with model + """ chat_params = { "model": self.model["local_id"], "api_key": SecretStr(self.api_key), @@ -265,6 +270,11 @@ def _format_media_block( return {"type": "image_url", "image_url": {"url": data_uri}} def set_structure(self, json_schema): + """ + Set desired response JSON schema + + :param json_schema: + """ if not json_schema: raise ValueError("json_schema is None") @@ -280,15 +290,3 @@ def set_structure(self, json_schema): else: self.llm = self.llm.with_structured_output(json_schema) self.structured_output = True - - @staticmethod - def get_models(config) -> dict: - """ - Returns a dict with LLM models supported by 4CAT, either through an API or as a local option. - Make sure to keep up-to-date! - - :returns dict, A dict with model IDs as keys and details as values - """ - available_models = config.get("llm.available_models", {}) - enabled_models = config.get("llm.enabled_models", {}) - return {k: v for k, v in available_models.items() if k in enabled_models} diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py index cf65497ff..9cf3c673f 100644 --- a/common/lib/llm/clients/litellm_client.py +++ b/common/lib/llm/clients/litellm_client.py @@ -1,14 +1,5 @@ """ Centralized HTTP client for communicating with a LiteLLM server. - -This class owns all direct HTTP calls to LiteLLM's REST API and provides shared -static helpers for capability parsing, display-name formatting, and building -canonical llm.available_models entries. It is a plain helper with no 4CAT -base-class dependency. - -This class is primarily intended for interfacing with LiteLLM, but since -LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface -with the OpenAI API as well. """ from common.lib.llm.llm_client import LLMProviderClient @@ -23,9 +14,9 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: """ Derive the media types a model supports from its LiteLLM metadata. - :param meta: ``model info`` response dict, or ``None``. + :param meta: `model info` response dict, or `None`. :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` + Returns `[]` when `meta` is `None` """ if meta is None or not meta.get("model_info"): return [] @@ -45,8 +36,7 @@ def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. + :param meta: `/api/show` response dict, or `None`. :returns: Human-readable display name string. """ model_name = self.get_global_model_id(meta) diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py index e21297448..54add80ca 100644 --- a/common/lib/llm/clients/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -1,9 +1,5 @@ """ Centralized HTTP client for communicating with an Ollama server. - -This class owns all direct HTTP calls to Ollama's REST API and provides shared static -helpers for capability parsing, display-name formatting, and building canonical -llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. """ import requests @@ -47,19 +43,19 @@ def list_models(self) -> list[dict]: def parse_supported_media_types(self, meta: dict) -> list[str]: """Derive the media types a model supports from its Ollama metadata. - **Primary path**: reads ``meta["capabilities"]``: - - ``"completion"`` → ``"text"`` - - ``"vision"`` → ``"image"`` - - ``"embedding"`` → ``"embedding"`` + **Primary path**: reads `meta["capabilities"]`: + - `"completion"` → `"text"` + - `"vision"` → `"image"` + - `"embedding"` → `"embedding"` - **Fallback path** (used when capabilities are absent or only yield ``"text"``): - inspects GGUF ``model_info`` / ``details`` for vision signals and adds - ``"image"`` if any are found. + **Fallback path** (used when capabilities are absent or only yield `"text"`): + inspects GGUF `model_info` / `details` for vision signals and adds + `"image"` if any are found. - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` (unknown — callers - should include the model, not block it). + :param dict meta: `/api/show` response dict, or `None`. + :returns list[str]: Ordered list of supported media type strings. + Returns `[]` when `meta` is `None` (unknown — callers should + include the model, not block it). """ if meta is None or not meta.get("metadata"): return [] @@ -96,9 +92,8 @@ def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. + :param dict meta: `/api/show` response dict, or `None`. + :returns str: Human-readable display name string. """ model_name = self.get_model_id(meta) @@ -125,17 +120,18 @@ def get_model_card_url(self, meta: dict) -> str: """ Get a URL for a model card for a given model - :param meta: Model metadata + :param dict meta: Model metadata :return str: Model card URL (empty string if unavailable) """ return f"https://ollama.com/library/{meta['model']}" def pull_model(self, model_id: str, stream: bool = False) -> bool: - """Pull a model from the Ollama registry. + """ + Pull a model from the Ollama registry. - :param model_id: Model name (e.g. ``"llama3:8b"``). - :param stream: Whether to stream the response (default ``False``). - :returns: ``True`` on success, ``False`` on failure. + :param dict model_id: Model name (e.g. `"llama3:8b"`). + :param str stream: Whether to stream the response (default `False`). + :returns bool: `True` on success, `False` on failure. """ try: r = self._session.post( @@ -159,10 +155,11 @@ def pull_model(self, model_id: str, stream: bool = False) -> bool: return False def delete_model(self, model_id: str) -> bool: - """Delete a model from the Ollama server. + """ + Delete a model from the Ollama server. - :param model_id: Model name (e.g. ``"llama3:8b"``). - :returns: ``True`` on success, ``False`` on failure. + :param str model_id: Model name (e.g. `"llama3:8b"`). + :returns bool: `True` on success, `False` on failure. """ try: r = self._session.delete( diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py index f8701dd7c..5ef038ad1 100644 --- a/common/lib/llm/clients/openai_client.py +++ b/common/lib/llm/clients/openai_client.py @@ -1,15 +1,14 @@ """ Centralized HTTP client for communicating with an OpenAI compatible server. -This class owns all direct HTTP calls to an OpenAI style REST API and provides shared -static helpers for capability parsing, display-name formatting, and building -canonical llm.available_models entries. It is a plain helper with no 4CAT -base-class dependency. +This includes vLLM and LM Studio. And LiteLLM, technically, but LiteLLM has +some useful API endpoints exclusive to it that we can benefit from, so use +the dedicated class for tht instead. """ from common.lib.llm.llm_client import LLMProviderClient -class LMStudioClient(LLMProviderClient): +class OpenAICompatibleClient(LLMProviderClient): type = "openai-like" _models_info_path = "/api/v1/models" @@ -20,9 +19,9 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: """ Derive the media types a model supports from its LiteLLM metadata. - :param meta: ``model info`` response dict, or ``None``. - :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` + :param dict meta: `model info` response dict, or `None`. + :returns list[str]: Ordered list of supported media type strings. + Returns `[]` when `meta` is `None` """ media_types = {"text"} # far as I can tell, text is always supported @@ -39,10 +38,9 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. += + :param dict meta: `/api/show` response dict, or `None`. + :returns str: Human-readable display name string. """ model_name = self.get_model_id(meta) diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py index 2a2db4dc3..f324dc391 100644 --- a/common/lib/llm/clients/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -26,9 +26,9 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: """ Derive the media types a model supports from its LiteLLM metadata. - :param meta: ``model info`` response dict, or ``None``. + :param meta: `model info` response dict, or `None`. :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` + Returns `[]` when `meta` is `None` """ return meta.get("supported_media_types", ["text"]) @@ -36,20 +36,17 @@ def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. + :param meta: `/api/show` response dict, or `None`. :returns: Human-readable display name string. """ return meta["name"] def build_model_entry(self, meta: dict) -> dict: """ - Build a canonical ``llm.available_models`` entry for a model. + Build a canonical `llm.available_models` entry for a model. - :param model_id: Raw model identifier. - :param display_name: Human-readable name (from ``format_display_name``). - :param meta: ``/api/show`` response dict, or ``None`` if unavailable. - :returns: Dict ready to store under ``llm.available_models[model_id]``. + :param meta: `/api/show` response dict, or `None` if unavailable. + :returns: Dict ready to store under `llm.available_models[model_id]`. """ entry = super().build_model_entry(meta) entry["provider_key"] = meta["provider"] diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index b31cb035a..7a6ac9779 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -26,15 +26,15 @@ def get_client(config, provider_config: dict) -> "LLMProviderClient": :param config: 4CAT config reader :param dict provider_config: Provider parameters, as configured in 4CAT - :return LLMProviderClient: + :return LLMProviderClient: A client object appropriate for the provider. """ # in-line import because we otherwise get circular import shenanigans from common.lib.llm.clients.ollama_client import OllamaClient from common.lib.llm.clients.litellm_client import LiteLLMClient - from common.lib.llm.clients.openai_client import LMStudioClient + from common.lib.llm.clients.openai_client import OpenAICompatibleClient from common.lib.llm.clients.thirdparty_client import ThirdPartyClient - for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): + for client_type in (OllamaClient, LiteLLMClient, OpenAICompatibleClient, ThirdPartyClient): if client_type.type == provider_config["type"]: return client_type(config, provider_config) @@ -44,6 +44,7 @@ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) - """ HTTP client for an LLM Provider + :param config: 4CAT config reader :param dict provider_config: Provider parameters, as configured in 4CAT :param int timeout: Default request timeout in seconds. :param Logger log: 4CAT log handler @@ -91,9 +92,10 @@ def get_status(self) -> bool | int: return False def list_models(self) -> list[dict]: - """List available models from the Ollama server. + """ + List available models from the LLM server. - :returns: List of model dicts, or ``[]`` on failure. + :returns: List of model dicts (un-mapped), or `[]` on failure. """ try: r = self._session.get( @@ -113,12 +115,10 @@ def list_models(self) -> list[dict]: def build_model_entry(self, meta: dict) -> dict: """ - Build a canonical ``llm.available_models`` entry for a model. + Build a canonical `llm.available_models` entry for a model. - :param model_id: Raw model identifier. - :param display_name: Human-readable name (from ``format_display_name``). - :param meta: ``/api/show`` response dict, or ``None`` if unavailable. - :returns: Dict ready to store under ``llm.available_models[model_id]``. + :param meta: `/api/show` response dict, or `None` if unavailable. + :returns: Dict ready to store under `llm.available_models[model_id]`. """ return { "id": self.get_global_model_id(meta), @@ -134,27 +134,28 @@ def get_model_card_url(self, meta: dict) -> str: """ Get a URL for a model card for a given model - :param meta: Model metadata + :param dict meta: Model metadata :return str: Model card URL (empty string if unavailable) """ return "" @abstractmethod def parse_supported_media_types(self, meta: dict) -> list[str]: - """Derive the media types a model supports from its Ollama metadata. + """ + Derive the media types a model supports from its Ollama metadata. - **Primary path**: reads ``meta["capabilities"]``: - - ``"completion"`` → ``"text"`` - - ``"vision"`` → ``"image"`` - - ``"embedding"`` → ``"embedding"`` + **Primary path**: reads `meta["capabilities"]`: + - `"completion"` → `"text"` + - `"vision"` → `"image"` + - `"embedding"` → `"embedding"` - **Fallback path** (used when capabilities are absent or only yield ``"text"``): - inspects GGUF ``model_info`` / ``details`` for vision signals and adds - ``"image"`` if any are found. + **Fallback path** (used when capabilities are absent or only yield `"text"`): + inspects GGUF `model_info` / `details` for vision signals and adds + `"image"` if any are found. - :param meta: ``/api/show`` response dict, or ``None``. + :param meta: `/api/show` response dict, or `None`. :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + Returns `[]` when `meta` is `None` (unknown — callers should include the model, not block it). """ pass From d378f8bfdac6f5a8e6dd17e20a89efb96765863d Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 18:02:18 +0200 Subject: [PATCH 47/53] Address #563 --- common/lib/config_definition.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0635a4c97..795faf02f 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -619,7 +619,9 @@ "type": UserInput.OPTION_TEXT, "default": "", "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema (e.g. 'https://').", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema " + "(e.g. 'https://'). When trying to connect to localhost while running 4CAT in Docker, use " + "'host.docker.internal' as a hostname instead.", }, "auth_header": { "type": UserInput.OPTION_TEXT, From cdb5e2e2e0de0f825a87ba72f29fc807f9477eef Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 8 Jun 2026 15:51:54 +0200 Subject: [PATCH 48/53] Fix migrate of provider settings --- helper-scripts/migrate/migrate-1.54-1.55.py | 43 ++++++++++++--------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py index 7a5074774..fc87c2d7e 100644 --- a/helper-scripts/migrate/migrate-1.54-1.55.py +++ b/helper-scripts/migrate/migrate-1.54-1.55.py @@ -37,29 +37,34 @@ print(" ...does not exist, filling with currently configured proviers") provider_type = db.fetchone("SELECT value FROM settings WHERE name = 'llm.provider_type'") providers = {} - if not provider_type: + if not provider_type or not provider_type.get("value"): print(" ...no provider currently configured") + else: - url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'") - host = url.split("/")[2] if "://" in url else "localhost" - auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'") - auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'") - provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'") - provider_id = f"{provider_type}-{host}" + provider_type = provider_type["value"] + try: + url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'")["value"] + host = url.split("/")[2] if "://" in url else "localhost" + auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'")["value"] + auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'")["value"] + provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'")["value"] + provider_id = f"{provider_type}-{host}" - # vLLM and LM Studio are both openai-like - provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like") - providers[provider_id] = { - "name": provider_name, - "type": provider_type, - "url": url, - "auth_header": auth_header, - "auth_key": auth_key, - "_id": provider_id - } + # vLLM and LM Studio are both openai-like + provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like") + providers[provider_id] = { + "name": provider_name, + "type": provider_type, + "url": url, + "auth_header": auth_header, + "auth_key": auth_key, + "_id": provider_id + } + except (TypeError, KeyError): + print(" ...provider configured but settings are incomplete, not migrating") - # add API models, always present - providers["thirdparty-models"] = { + # add API models, always present + providers["thirdparty-models"] = { "name": "Third-party models", "type": "api", "url": "", From b440536992784d646226dfeae22d5896f02ea48a Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 8 Jun 2026 16:12:29 +0200 Subject: [PATCH 49/53] Better test error stack trace --- tests/test_modules.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index 3b9a82579..4e76e26ff 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,4 +1,6 @@ import traceback +from traceback import FrameSummary + import pytest import time import json @@ -219,7 +221,7 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data processor_class.get_options(parent_dataset=mock_dataset, config=mock_basic_config) except Exception as e: # Log the failure and add it to the failures list - trace = traceback.TracebackException.from_exception(e).stack[-1] + trace = get_trace(traceback.TracebackException.from_exception(e).stack) logger.error(f"Processor {processor_name} failed in get_options: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) @@ -232,12 +234,12 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data try: processor_class(logger, job=mock_job, queue=mock_job_queue, manager=None, modules=fourcat_modules) except Exception as e: - trace = traceback.TracebackException.from_exception(e).stack[-1] + trace = get_trace(traceback.TracebackException.from_exception(e).stack) logger.error(f"Processor {processor_name} failed in process(): {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) except Exception as e: - trace = traceback.TracebackException.from_exception(e).stack[-1] + trace = get_trace(traceback.TracebackException.from_exception(e).stack) logger.error(f"Processor {processor_name} failed while setting up: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) @@ -309,4 +311,19 @@ def test_dataset_finish_raises_on_double_finish(mock_dataset): mock_dataset.data["status_type"] = StatusType.WARNING.value with pytest.raises(RuntimeError, match="finished"): - mock_dataset.finish(5) \ No newline at end of file + mock_dataset.finish(5) + +def get_trace(stack) -> FrameSummary: + """ + Get relevant stack trace frame + + Skips over frames that are from (frozen) internal libraries + + :param stack: + :return FrameSummary: + """ + bit = stack.pop() + while stack and not bit.filename.startswith(str(PATH_ROOT)): + bit = stack.pop() + + return bit \ No newline at end of file From ab9c7ae7fdad8a23cb0130f915c5f552be6e5fdd Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Tue, 9 Jun 2026 15:09:39 +0200 Subject: [PATCH 50/53] git fix: no tracking extensions syslink (already added to .gitignore on main) --- extensions | 1 - 1 file changed, 1 deletion(-) delete mode 120000 extensions diff --git a/extensions b/extensions deleted file mode 120000 index c25d13e68..000000000 --- a/extensions +++ /dev/null @@ -1 +0,0 @@ -/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions \ No newline at end of file From 866765b7f130ffd289d65a7f4c4fce05c32b03b8 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Tue, 9 Jun 2026 15:26:45 +0200 Subject: [PATCH 51/53] fix allowed_models validation. --- processors/machine_learning/llm_prompter.py | 2 +- processors/machine_learning/prompt_compass.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 5b4080e76..82eefd4bc 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -1108,7 +1108,7 @@ def validate_query(query, request, config): raise QueryParametersException("You need to enter an API key when using third-party models.") allowed_models = LLMPrompter.get_model_library(config) - if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + if query["model"] not in chain(*[v.keys() for v in allowed_models.values()]): raise QueryParametersException(f"The '{query['model']}' model is not currently available.") # For media archive datasets, use_media won't be present in the query diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 4f4c7d0d8..7ef981140 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -250,7 +250,7 @@ def validate_query(query, request, config): :return: """ allowed_models = LLMPrompter.get_model_library(config) - if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + if query["model"] not in chain(*[v.keys() for v in allowed_models.values()]): raise QueryParametersException(f"The '{query['model']}' model is not currently available.") if not query["model"].startswith("local") and not query.get("api_key"): From 17e1bee0271f6ed84ce1a22ba10d5257b75e19d9 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Tue, 9 Jun 2026 16:19:29 +0200 Subject: [PATCH 52/53] fix: missing user supplied api_key (?) --- processors/machine_learning/llm_prompter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 82eefd4bc..dcdacb7fa 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -371,7 +371,7 @@ def process(self): self.dataset.delete_parameter("batches") # Set all variables through which we can reach the LLM - api_key = "" + api_key = self.parameters.get("api_key", "").strip() client_kwargs = {} # load model and provider metadata From f3f8d9c133c5726ec83e5826a0c71691cfa737b7 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Tue, 9 Jun 2026 16:54:51 +0200 Subject: [PATCH 53/53] fix migrate script; also db.upsert needs constraints --- common/lib/database.py | 13 ++++++++----- helper-scripts/migrate/migrate-1.54-1.55.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/common/lib/database.py b/common/lib/database.py index 8bae8d113..44814fd32 100644 --- a/common/lib/database.py +++ b/common/lib/database.py @@ -258,22 +258,25 @@ def insert(self, table, data, commit=True, safe=False, constraints=None, return_ cursor.close() return result - def upsert(self, table, data, commit=True, constraints=None): + def upsert(self, table, data, constraints, commit=True): """ Create or update database record If the record could not be inserted because of a constraint, the constraining record is updated instead. + `constraints` is required: an `ON CONFLICT ... DO UPDATE` statement is + only valid SQL when it specifies an inference target, so there is no + meaningful upsert without it. Use `insert()` if you do not need conflict + handling. + :param string table: Table to upsert record into :param dict data: Data to upsert + :param tuple constraints: The columns that should be used as the conflict + target, e.g. ON CONFLICT (name, lastname) DO UPDATE :param bool commit: Whether to commit after executing the query - :param tuple constraints: This tuple may contain the columns that should be used as a - constraint, e.g. ON CONFLICT (name, lastname) DO UPDATE :return int: Number of affected rows. Note that this may be unreliable if `commit` is `False` """ - if constraints is None: - constraints = [] # escape identifiers identifiers = [sql.Identifier(column) for column in data.keys()] diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py index fc87c2d7e..2de9015a4 100644 --- a/helper-scripts/migrate/migrate-1.54-1.55.py +++ b/helper-scripts/migrate/migrate-1.54-1.55.py @@ -80,6 +80,6 @@ db.execute("DELETE FROM settings WHERE name LIKE 'llm.%' AND name NOT IN ('llm.providers', 'llm.available_models', 'llm.access')") print(" Removing all known models (will be re-indexed on 4CAT restart)") -db.upsert("settings", {"name": "llm.available_models", "value": "{}"}) +db.upsert("settings", {"name": "llm.available_models", "value": "{}", "tag": ""}, constraints=["name", "tag"]) print(" - done!")