nf-neuro · AlexVCaron · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/.github/workflows/lint_component.yml b/.github/workflows/lint_component.yml
@@ -97,6 +97,7 @@ jobs:
       - name: Convert component meta.yml to markdown
         run: |
           poetry run nf-neuro-convert-${{ inputs.type }} \
+            --enhance-keywords \
             ${{ inputs.type }}s/nf-neuro/${{ inputs.component }} \
             ${{ github.sha }} \
             $(echo "${{ inputs.component }}" | sed 's/\//_/g').md
diff --git a/docs/astro/convert_module.py b/docs/astro/convert_module.py
@@ -13,6 +13,7 @@
     li,
     link,
 )
+from docs.astro.keywords import DEFAULT_MODEL, extract_keywords
 
 
 def _create_parser():
@@ -23,6 +24,14 @@ def _create_parser():
     p.add_argument('module_path', help='Path to the module')
     p.add_argument('current_commit_sha', help='Current commit sha')
     p.add_argument('output', help='Name of the output markdown file')
+    p.add_argument(
+        '--enhance-keywords', action='store_true', default=False,
+        help='Use an LLM via Ollama to extract additional SEO keywords'
+    )
+    p.add_argument(
+        '--llm-model', default=DEFAULT_MODEL, metavar='MODEL',
+        help=f'Ollama model used for keyword extraction (default: {DEFAULT_MODEL})'
+    )
 
     return p
 
@@ -51,6 +60,9 @@ def main():
     data["currentcommit"] = args.current_commit_sha
     data["currentdate"] = datetime.datetime.now().strftime("%Y-%m-%d")
 
+    if args.enhance_keywords:
+        data["keywords"] = extract_keywords(data, model=args.llm_model)
+
     template = env.get_template('module.md.jinja2')
     output_path = Path(args.output)
     output_path.write_text(template.render(**data))

diff --git a/docs/astro/convert_subworkflow.py b/docs/astro/convert_subworkflow.py
@@ -14,6 +14,7 @@
     link,
     sanitize_outside_codeblocks,
 )
+from docs.astro.keywords import DEFAULT_MODEL, extract_keywords
 
 
 DOC_URL_BASE = "https://nf-neuro.github.io"
@@ -71,6 +72,14 @@ def _create_parser():
     p.add_argument('subworkflow_path', help='Name of the subworkflow')
     p.add_argument('current_commit_sha', help='Current commit sha')
     p.add_argument('output', help='Name of the output markdown file')
+    p.add_argument(
+        '--enhance-keywords', action='store_true', default=False,
+        help='Use an LLM via Ollama to extract additional SEO keywords'
+    )
+    p.add_argument(
+        '--llm-model', default=DEFAULT_MODEL, metavar='MODEL',
+        help=f'Ollama model used for keyword extraction (default: {DEFAULT_MODEL})'
+    )
 
     return p
 
@@ -99,6 +108,9 @@ def main():
     data["currentcommit"] = args.current_commit_sha
     data["currentdate"] = datetime.datetime.now().strftime("%Y-%m-%d")
 
+    if args.enhance_keywords:
+        data["keywords"] = extract_keywords(data, model=args.llm_model)
+
     template = env.get_template('subworkflow.md.jinja2')
     output_path = Path(args.output)
     output_path.write_text(template.render(**data))

diff --git a/docs/astro/keywords.py b/docs/astro/keywords.py
@@ -0,0 +1,122 @@
+"""
+LLM-powered keyword extraction for nf-neuro documentation.
+
+Uses Ollama with the qwen3 model to generate additional relevant keywords
+from meta.yml data, improving discoverability on the website and in search
+engines.
+"""
+
+import json
+import logging
+
+log = logging.getLogger(__name__)
+
+DEFAULT_MODEL = "qwen3"
+
+
+def _build_prompt(data):
+    """Build a keyword-extraction prompt from meta.yml data."""
+    name = data.get("name", "")
+    description = data.get("description", "")
+    existing_keywords = data.get("keywords", [])
+    tools = data.get("tools", [])
+
+    tool_names = []
+    tool_descriptions = []
+    for tool in tools:
+        for tool_name, tool_meta in tool.items():
+            tool_names.append(tool_name)
+            if isinstance(tool_meta, dict) and "description" in tool_meta:
+                tool_descriptions.append(
+                    f"{tool_name}: {tool_meta['description'].strip()}"
+                )
+
+    prompt = (
+        "You are a scientific SEO expert specialising in neuroimaging and "
+        "bioinformatics software.\n\n"
+        "Given the following information about a Nextflow module for neuroimaging "
+        "data processing, extract a list of relevant keywords for SEO and search "
+        "discoverability. Focus on technical terms, neuroimaging concepts, "
+        "computational methods, data types, and scientific domains relevant to "
+        "the module.\n\n"
+        f"Module name: {name}\n"
+        f"Description: {description}\n"
+        f"Existing keywords: {', '.join(existing_keywords)}\n"
+        f"Tools used: {', '.join(tool_names)}\n"
+        f"Tool descriptions: {'; '.join(tool_descriptions)}\n\n"
+        "Return ONLY a JSON array of 5 to 15 additional keyword strings that are "
+        "NOT already present in the existing keywords list. Keywords should be "
+        "specific, relevant, and useful for search engines. Do not include "
+        "explanations or any other text outside the JSON array.\n\n"
+        'Example format: ["keyword1", "keyword2", "keyword3"]'
+    )
+    return prompt
+
+
+def extract_keywords(data, model=DEFAULT_MODEL):
+    """Extract additional keywords from meta.yml data using an LLM via Ollama.
+
+    Calls the specified Ollama model to generate SEO-relevant keywords that
+    complement the existing ones defined in the meta.yml file. Falls back to
+    the original keyword list gracefully when Ollama is unavailable or the
+    model call fails.
+
+    Parameters
+    ----------
+    data : dict
+        Parsed meta.yml data.
+    model : str, optional
+        Ollama model name to use for keyword extraction (default: ``qwen3``).
+
+    Returns
+    -------
+    list[str]
+        Augmented list of keywords combining the original entries with any
+        additional ones produced by the LLM, deduplicated and in order.
+    """
+    existing_keywords = data.get("keywords", []) or []
+
+    try:
+        import ollama
+
+        prompt = _build_prompt(data)
+        response = ollama.chat(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            options={"temperature": 0.2},
+        )
+        content = response.message.content.strip()
+
+        # Locate the first JSON array in the response using bracket matching
+        # to handle nested arrays and avoid capturing partial results.
+        start = content.find("[")
+        if start != -1:
+            depth = 0
+            end = -1
+            for i, ch in enumerate(content[start:], start):
+                if ch == "[":
+                    depth += 1
+                elif ch == "]":
+                    depth -= 1
+                    if depth == 0:
+                        end = i + 1
+                        break
+            if end != -1:
+                new_keywords = json.loads(content[start:end])
+                if isinstance(new_keywords, list):
+                    existing_lower = {k.lower() for k in existing_keywords}
+                    additional = [
+                        k
+                        for k in new_keywords
+                        if isinstance(k, str) and k.lower() not in existing_lower
+                    ]
+                    return existing_keywords + additional
+
+        log.warning(
+            "LLM response did not contain a parseable JSON keyword array; "
+            "using original keywords."
+        )
+    except Exception as exc:
+        log.warning("LLM keyword extraction failed (%s); using original keywords.", exc)
+
+    return existing_keywords
diff --git a/modules/nf-neuro/bundle/bundleparc/meta.yml b/modules/nf-neuro/bundle/bundleparc/meta.yml
@@ -1,5 +1,5 @@
 name: bundle_bundleparc
-description: process bundleparc
+description: Extract label maps of bundles using the bundleparc machine learning model.
 keywords:
   - Tractography
   - Bundleparc