diff --git a/.flox/env/manifest.lock b/.flox/env/manifest.lock index 18c2fe9a..dc0948a4 100644 --- a/.flox/env/manifest.lock +++ b/.flox/env/manifest.lock @@ -12,6 +12,9 @@ "findutils": { "pkg-path": "findutils" }, + "gnumake": { + "pkg-path": "gnumake" + }, "gnused": { "pkg-path": "gnused" }, @@ -662,6 +665,136 @@ "group": "toplevel", "priority": 5 }, + { + "attr_path": "gnumake", + "broken": false, + "derivation": "/nix/store/pf71f0ja823aanl6073z3slrpz2hpxr1-gnumake-4.4.1.drv", + "description": "Tool to control the generation of non-source files from sources", + "install_id": "gnumake", + "license": "GPL-3.0-or-later", + "locked_url": "https://github.com/flox/nixpkgs?rev=8eaee110344796db060382e15d3af0a9fc396e0e", + "name": "gnumake-4.4.1", + "pname": "gnumake", + "rev": "8eaee110344796db060382e15d3af0a9fc396e0e", + "rev_count": 864002, + "rev_date": "2025-09-19T10:20:10Z", + "scrape_date": "2025-09-21T05:38:43.319343Z", + "stabilities": [ + "unstable" + ], + "unfree": false, + "version": "4.4.1", + "outputs_to_install": [ + "man", + "out" + ], + "outputs": { + "info": "/nix/store/cwx5agxi3ig3gmbk4c4dn7df2krzlddy-gnumake-4.4.1-info", + "man": "/nix/store/a4aay80xgirjm8hk1rd142qcd1kkfps8-gnumake-4.4.1-man", + "out": "/nix/store/sjxx5p05vzq7xam62h21cyzkbyb1amvd-gnumake-4.4.1" + }, + "system": "aarch64-darwin", + "group": "toplevel", + "priority": 5 + }, + { + "attr_path": "gnumake", + "broken": false, + "derivation": "/nix/store/876aq0p8d0z7sfyjdawn9mrdfnv7n458-gnumake-4.4.1.drv", + "description": "Tool to control the generation of non-source files from sources", + "install_id": "gnumake", + "license": "GPL-3.0-or-later", + "locked_url": "https://github.com/flox/nixpkgs?rev=8eaee110344796db060382e15d3af0a9fc396e0e", + "name": "gnumake-4.4.1", + "pname": "gnumake", + "rev": "8eaee110344796db060382e15d3af0a9fc396e0e", + "rev_count": 864002, + "rev_date": "2025-09-19T10:20:10Z", + "scrape_date": "2025-09-21T06:10:24.182468Z", + "stabilities": [ + "unstable" + ], + "unfree": false, + "version": "4.4.1", + "outputs_to_install": [ + "man", + "out" + ], + "outputs": { + "debug": "/nix/store/j8lcp5zjdq0l0ipvji7s13vdc53nzcki-gnumake-4.4.1-debug", + "info": "/nix/store/8922q241lh4qbxd2g7jxsnjnkfmgap3z-gnumake-4.4.1-info", + "man": "/nix/store/0a4l47b9sqc28ssi5hsq21ivs2hmbzcp-gnumake-4.4.1-man", + "out": "/nix/store/9cns3585v908dwbf5nfqqjghv955ndrq-gnumake-4.4.1" + }, + "system": "aarch64-linux", + "group": "toplevel", + "priority": 5 + }, + { + "attr_path": "gnumake", + "broken": false, + "derivation": "/nix/store/xrm5hvv49gd5v31937jmr0vc6m8a1v64-gnumake-4.4.1.drv", + "description": "Tool to control the generation of non-source files from sources", + "install_id": "gnumake", + "license": "GPL-3.0-or-later", + "locked_url": "https://github.com/flox/nixpkgs?rev=8eaee110344796db060382e15d3af0a9fc396e0e", + "name": "gnumake-4.4.1", + "pname": "gnumake", + "rev": "8eaee110344796db060382e15d3af0a9fc396e0e", + "rev_count": 864002, + "rev_date": "2025-09-19T10:20:10Z", + "scrape_date": "2025-09-21T06:39:00.878032Z", + "stabilities": [ + "unstable" + ], + "unfree": false, + "version": "4.4.1", + "outputs_to_install": [ + "man", + "out" + ], + "outputs": { + "info": "/nix/store/451pi5y9s89na99pxv6jjvqa44r08dha-gnumake-4.4.1-info", + "man": "/nix/store/g7nffhgbmv3r01199lhp0qz741kvnlvf-gnumake-4.4.1-man", + "out": "/nix/store/fy063r4nqi1w79bklqhiv7ny0xwdqjp3-gnumake-4.4.1" + }, + "system": "x86_64-darwin", + "group": "toplevel", + "priority": 5 + }, + { + "attr_path": "gnumake", + "broken": false, + "derivation": "/nix/store/riz7jd6hvqpxzxgyhj76ianh96sxhvz4-gnumake-4.4.1.drv", + "description": "Tool to control the generation of non-source files from sources", + "install_id": "gnumake", + "license": "GPL-3.0-or-later", + "locked_url": "https://github.com/flox/nixpkgs?rev=8eaee110344796db060382e15d3af0a9fc396e0e", + "name": "gnumake-4.4.1", + "pname": "gnumake", + "rev": "8eaee110344796db060382e15d3af0a9fc396e0e", + "rev_count": 864002, + "rev_date": "2025-09-19T10:20:10Z", + "scrape_date": "2025-09-21T07:10:55.800436Z", + "stabilities": [ + "unstable" + ], + "unfree": false, + "version": "4.4.1", + "outputs_to_install": [ + "man", + "out" + ], + "outputs": { + "debug": "/nix/store/7vrxj6zy7y4a83d2q9585sxmcnkfs9ml-gnumake-4.4.1-debug", + "info": "/nix/store/m0ijkc5j3wdawh302pns9b45v9n6nq64-gnumake-4.4.1-info", + "man": "/nix/store/ha44mgbdcrzgah0dnjd28ax4hrdkc4mm-gnumake-4.4.1-man", + "out": "/nix/store/ahxj2q2mrl9z2k77ahqsl9j4zxq1wf84-gnumake-4.4.1" + }, + "system": "x86_64-linux", + "group": "toplevel", + "priority": 5 + }, { "attr_path": "gnused", "broken": false, diff --git a/.flox/env/manifest.toml b/.flox/env/manifest.toml index df77ae2b..feb8289a 100644 --- a/.flox/env/manifest.toml +++ b/.flox/env/manifest.toml @@ -15,6 +15,7 @@ lychee.pkg-path = "lychee" lychee.pkg-group = "lint" markdownlint-cli2.pkg-path = "markdownlint-cli2" markdownlint-cli2.pkg-group = "lint" +gnumake.pkg-path = "gnumake" [hook] on-activate = ''' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78b0f155..073c450f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,7 @@ jobs: with: command: | mkdocs build + python3 tools/generate_llms_txt.py ./site mkdir -p ./public/docs cp -R ./site/* ./public/docs/ chmod -R +w ./public/docs diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..f034cd9c --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +# Flox Documentation Makefile + +.PHONY: help dev build clean install + +# Default target +help: + @echo "Available targets:" + @echo " dev - Start development server with live reload" + @echo " build - Build static site and generate AI files" + @echo " clean - Clean build artifacts" + @echo " install - Install dependencies" + @echo " help - Show this help message" + +# Development server with live reload +dev: + @echo "Starting development server..." + @echo "Site will be available at: http://127.0.0.1:8000" + @echo "Press Ctrl+C to stop" + mkdocs serve + +# Build static site and generate AI files +build: + @echo "Building static site..." + mkdocs build + @echo "Generating AI files..." + python3 tools/generate_llms_txt.py ./site + @echo "✅ Build complete! Site available in ./site/" + +# Clean build artifacts +clean: + @echo "Cleaning build artifacts..." + rm -rf site/ + rm -rf public/ + @echo "✅ Clean complete!" + +# Install dependencies (if needed) +install: + @echo "Installing dependencies..." + @if command -v poetry >/dev/null 2>&1; then \ + poetry install; \ + else \ + echo "Poetry not found. Please install dependencies manually."; \ + fi diff --git a/README.md b/README.md index 23a92d94..bca40f6c 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,53 @@ Live at: [flox.dev/docs](https://flox.dev/docs). ## Usage +### Quick Start + +```bash +# Activate the Flox environment +$ flox activate +✅ You are now using the environment 'floxdocs'. + +# Start development server +λ (floxdocs) $ make dev +``` + +The documentation will be available at `http://127.0.0.1:8000` with live reload. + +### Available Commands + +```bash +make dev # Start development server with live reload +make build # Build static site and generate AI files +make clean # Clean build artifacts +make help # Show all available commands ``` + +### Flox Native Usage + +You can still use the original Flox services approach: + +```bash $ flox activate ✅ You are now using the environment 'floxdocs'. λ (floxdocs) $ flox services start mkdocs ✅ Service 'mkdocs' started. - ``` + Once mkdocs service started you can preview the documentation at `https://127.0.0.1:8000`. +## AI-Friendly Documentation + +This documentation site automatically generates AI-friendly files for different use cases: + +- **`llms.txt`** - Optimized for AI agents with critical rules, workflows, and organized sitemap +- **`docs-content.txt`** - Comprehensive content for answer engines and RAG systems + +These files are automatically generated during the build process and are available at: +- `https://flox.dev/docs/llms.txt` +- `https://flox.dev/docs/docs-content.txt` ## Guidelines diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 00000000..6f687e1b --- /dev/null +++ b/tools/README.md @@ -0,0 +1,31 @@ +# Tools + +This directory contains build tools and utilities for the Flox documentation site. + +## Scripts + +### `generate_llms_txt.py` + +Generates AI-friendly documentation files from the built MkDocs site: + +- **`llms.txt`** - Agent-focused file with critical rules, workflows, and organized sitemap +- **`docs-content.txt`** - Answer engine file with comprehensive documentation content + +**Usage:** +```bash +python3 tools/generate_llms_txt.py +``` + +### `generate_llms_txt.sh` + +Convenience script for local development. Generates both AI files after a MkDocs build. + +**Usage:** +```bash +mkdocs build +./tools/generate_llms_txt.sh +``` + +## Integration + +These scripts are automatically run during CI builds in `.github/workflows/ci.yml` to ensure the AI files are always up-to-date with the documentation content. diff --git a/tools/generate_llms_txt.py b/tools/generate_llms_txt.py new file mode 100755 index 00000000..89c46e61 --- /dev/null +++ b/tools/generate_llms_txt.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python3 +""" +Generate llms.txt from the built MkDocs site. +This script scans the built site directory and creates a comprehensive llms.txt file. +""" + +import os +import re +import sys +from pathlib import Path +from urllib.parse import unquote + + +def normalize_url(path: str, base_url: str = "https://flox.dev/docs") -> str: + """Convert file path to URL.""" + # Remove leading/trailing slashes + path = path.strip('/') + if not path or path == 'index.html': + return base_url + # Remove .html extension + path = path.replace('.html', '') + return f"{base_url}/{path}" + + +def extract_title_from_html(html_content: str) -> str: + """Extract page title from HTML content.""" + # Check if this is a redirect page + if 'Redirecting...' in html_content or 'redirect' in html_content.lower(): + return None # Signal this is a redirect page + + # Try to find the first h1 tag + h1_match = re.search(r']*>(.*?)', html_content, re.DOTALL) + if h1_match: + title = h1_match.group(1).strip() + # Remove markdown tags and clean up + title = re.sub(r'<[^>]+>', '', title) + return title + + # Fallback to title tag + title_match = re.search(r'(.*?)', html_content, re.DOTALL) + if title_match: + title = title_match.group(1).strip() + title = title.replace(' - Flox Docs', '').strip() + return title + + return "Untitled" + + +def extract_description_from_html(html_content: str) -> str: + """Extract meta description or first paragraph.""" + # Try to find meta description + desc_match = re.search(r']*>.*?]*>(.*?)

', html_content, re.DOTALL) + if para_match: + desc = para_match.group(1).strip() + # Remove HTML tags + desc = re.sub(r'<[^>]+>', '', desc) + # Limit length + if len(desc) > 200: + desc = desc[:200] + "..." + return desc + + return "" + + +def categorize_page(url: str, title: str) -> str: + """Categorize pages for better organization.""" + if '/concepts/' in url: + return 'concepts' + elif '/tutorials/' in url: + return 'tutorials' + elif '/man/' in url: + return 'manual' + elif '/languages/' in url: + return 'languages' + elif '/install-flox/' in url: + return 'installation' + elif '/customer/' in url: + return 'customer' + elif '/snippets/' in url: + return 'snippets' + elif url.endswith('/docs') or url.endswith('/docs/'): + return 'overview' + else: + return 'other' + + +def get_page_description(url: str, title: str) -> str: + """Get a helpful description for common pages.""" + descriptions = { + 'concepts/environments': 'Understanding Flox environments and how they work', + 'concepts/activation': 'How to activate and use Flox environments', + 'concepts/floxhub': 'Understanding FloxHub package registry and sharing', + 'concepts/generations': 'Environment snapshots and version management', + 'concepts/packages-and-catalog': 'Package management and the Flox catalog', + 'concepts/services': 'Running services within Flox environments', + 'concepts/composition': 'Combining and layering multiple environments', + 'concepts/builds': 'Building packages and environments', + 'concepts/publishing': 'Publishing packages to FloxHub', + 'tutorials/creating-environments': 'Step-by-step guide to create your first environment', + 'tutorials/sharing-environments': 'How to share environments with team members', + 'tutorials/customizing-environments': 'Customizing shell environment and behavior', + 'tutorials/build-and-publish': 'Building and publishing custom packages', + 'tutorials/ci-cd': 'Using Flox in continuous integration pipelines', + 'tutorials/composition': 'Reusing and combining developer environments', + 'tutorials/multi-arch-environments': 'Cross-platform environment design', + 'tutorials/cuda': 'Using CUDA with Flox environments', + 'tutorials/migrations/homebrew': 'Migrating from Homebrew to Flox', + 'tutorials/migrations/nvm': 'Migrating from Node Version Manager to Flox', + 'languages/python': 'Python development with Flox', + 'languages/nodejs': 'Node.js and JavaScript development', + 'languages/go': 'Go development with Flox', + 'languages/rust': 'Rust development with Flox', + 'languages/c': 'C/C++ development with Flox', + 'languages/jvm': 'Java and JVM development', + 'languages/ruby': 'Ruby development with Flox', + 'install-flox/install': 'Installation instructions for Flox', + 'install-flox/uninstall': 'How to uninstall Flox', + 'flox-5-minutes': 'Quick start guide to get up and running', + } + + # Extract the key part of the URL + key = url.replace('https://flox.dev/docs/', '').replace('/index', '') + return descriptions.get(key, '') + + +def get_site_structure(site_dir: Path) -> list: + """Get all pages from the built site.""" + pages = [] + html_files = list(site_dir.rglob('*.html')) + + for html_file in sorted(html_files): + # Skip generated JS/search files + if html_file.name in ['search.html', '404.html', 'sitemap.xml']: + continue + + rel_path = html_file.relative_to(site_dir) + url = normalize_url(str(rel_path)) + + try: + with open(html_file, 'r', encoding='utf-8') as f: + content = f.read() + + title = extract_title_from_html(content) + + # Skip redirect pages + if title is None: + continue + + description = extract_description_from_html(content) + + description = get_page_description(url, title) + category = categorize_page(url, title) + + pages.append({ + 'url': url, + 'title': title, + 'path': str(rel_path), + 'description': description, + 'category': category + }) + except Exception as e: + print(f"Error processing {html_file}: {e}", file=sys.stderr) + + return pages + + +def extract_page_content(html_content: str) -> str: + """Extract main content from HTML for answer engine.""" + # Remove script and style elements + content = re.sub(r']*>.*?', '', html_content, flags=re.DOTALL) + content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) + + # Extract main content area + main_match = re.search(r']*>(.*?)', content, re.DOTALL) + if main_match: + content = main_match.group(1) + + # Remove unwanted elements + content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) + content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) + content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) + + # Remove license/copyright text + content = re.sub(r'Permission is hereby granted.*?DEALINGS IN THE SOFTWARE\.', '', content, flags=re.DOTALL) + content = re.sub(r'Copyright.*?All rights reserved\.', '', content, flags=re.DOTALL) + content = re.sub(r'THE SOFTWARE IS PROVIDED.*?DEALINGS IN THE SOFTWARE\.', '', content, flags=re.DOTALL) + content = re.sub(r'-->.*?Have questions\?', '', content, flags=re.DOTALL) + + # Remove HTML tags but preserve structure + content = re.sub(r']*>(.*?)', r'\n\n#\1 \2\n', content) + content = re.sub(r']*>(.*?)

', r'\1\n\n', content) + content = re.sub(r']*>(.*?)', r'- \1\n', content) + content = re.sub(r']*>(.*?)', r'`\1`', content) + content = re.sub(r']*>]*>(.*?)', r'```\n\1\n```', content, flags=re.DOTALL) + content = re.sub(r'<[^>]+>', '', content) + + # Clean up whitespace and remove empty lines + content = re.sub(r'\n\s*\n\s*\n', '\n\n', content) + content = re.sub(r'^\s*\n', '', content, flags=re.MULTILINE) + content = content.strip() + + # Only return substantial content (more than 100 chars) + if len(content) < 100: + return "" + + return content + + +def generate_llms_txt(site_dir: Path, output_path: Path): + """Generate llms.txt file for agents.""" + pages = get_site_structure(site_dir) + + # Read existing llms.txt to get the header + existing_llms = Path('docs/llms.txt') + header = "" + if existing_llms.exists(): + with open(existing_llms, 'r') as f: + lines = f.readlines() + # Extract header (everything before the Sitemap section) + for i, line in enumerate(lines): + if line.startswith('## Sitemap'): + header = ''.join(lines[:i]) + break + header = ''.join(lines) + + # Add key terminology section + terminology = """ +## Key Terms +- **Environment**: A reproducible development environment with specific packages and configurations +- **Manifest**: A declarative configuration file (manifest.toml) defining an environment's packages and settings +- **Generation**: A snapshot of an environment at a specific point in time, allowing rollbacks +- **FloxHub**: The package registry and sharing platform for Flox environments +- **Activation**: Running commands within a Flox environment's context +- **Catalog**: The collection of available packages that can be installed +- **Services**: Long-running processes defined in the manifest that can be managed by Flox + +## Quick Reference +### Common Workflows +- **New Project**: `flox init -d .` → `flox install -d . ` → `flox activate -d . -- ` +- **Multi-environment Project**: `flox init -d backend` + `flox init -d frontend` +- **Sharing Environment**: `flox push` → `flox pull` on another machine +- **Package Management**: `flox search ` → `flox show ` → `flox install -d . ` +- **Service Management**: Define in manifest → `flox services start ` → `flox services status` + +""" + + # Group pages by category + categories = {} + for page in pages: + cat = page['category'] + if cat not in categories: + categories[cat] = [] + categories[cat].append(page) + + # Generate organized sitemap + sitemap = ["## Documentation Structure\n"] + + # Define category order and titles + category_order = [ + ('overview', 'Overview'), + ('installation', 'Installation'), + ('tutorials', 'Tutorials & Getting Started'), + ('concepts', 'Core Concepts'), + ('languages', 'Language-Specific Guides'), + ('manual', 'Command Reference'), + ('customer', 'Customer Resources'), + ('snippets', 'Code Snippets'), + ('other', 'Other') + ] + + for cat_key, cat_title in category_order: + if cat_key in categories: + sitemap.append(f"### {cat_title}\n") + for page in sorted(categories[cat_key], key=lambda x: x['title']): + if page['description']: + sitemap.append(f"- [{page['title']}]({page['url']}) - {page['description']}\n") + else: + sitemap.append(f"- [{page['title']}]({page['url']})\n") + sitemap.append("\n") + + # Write the complete llms.txt + content = header + terminology + ''.join(sitemap) + + with open(output_path, 'w') as f: + f.write(content) + + print(f"Generated {output_path} with {len(pages)} pages organized into {len(categories)} categories") + + +def generate_docs_content(site_dir: Path, output_path: Path): + """Generate docs-content.txt file for answer engines.""" + pages = get_site_structure(site_dir) + + # Start with introduction + content = """# Flox Documentation Content + +This file contains comprehensive documentation content for answer engines and RAG systems. + +""" + + # Group pages by category and add content + categories = {} + for page in pages: + cat = page['category'] + if cat not in categories: + categories[cat] = [] + categories[cat].append(page) + + # Define category order and titles + category_order = [ + ('overview', 'Overview'), + ('installation', 'Installation'), + ('tutorials', 'Tutorials & Getting Started'), + ('concepts', 'Core Concepts'), + ('languages', 'Language-Specific Guides'), + ('manual', 'Command Reference'), + ('customer', 'Customer Resources'), + ('snippets', 'Code Snippets'), + ('other', 'Other') + ] + + for cat_key, cat_title in category_order: + if cat_key in categories: + content += f"\n## {cat_title}\n\n" + + for page in sorted(categories[cat_key], key=lambda x: x['title']): + content += f"### {page['title']}\n" + content += f"URL: {page['url']}\n\n" + + if page['description']: + content += f"**Description**: {page['description']}\n\n" + + # Try to extract content from the HTML file + html_file = site_dir / page['path'] + try: + with open(html_file, 'r', encoding='utf-8') as f: + html_content = f.read() + + page_content = extract_page_content(html_content) + if page_content and len(page_content) > 200: # Only include substantial content + content += page_content + "\n\n" + else: + content += f"*Content available at: {page['url']}*\n\n" + except Exception as e: + content += f"*Content available at: {page['url']}*\n\n" + + content += "---\n\n" + + # Write the complete docs-content.txt + with open(output_path, 'w') as f: + f.write(content) + + print(f"Generated {output_path} with comprehensive documentation content") + + +def main(): + if len(sys.argv) < 2: + print("Usage: generate_llms_txt.py ") + sys.exit(1) + + site_dir = Path(sys.argv[1]) + + if not site_dir.exists(): + print(f"Error: Directory {site_dir} does not exist") + sys.exit(1) + + # Generate files in temp location first, then copy to site root + import tempfile + import shutil + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + llms_temp = temp_path / 'llms.txt' + docs_content_temp = temp_path / 'docs-content.txt' + + print("Generating files for AI systems...") + generate_llms_txt(site_dir, llms_temp) + generate_docs_content(site_dir, docs_content_temp) + + # Copy to site root + llms_final = site_dir / 'llms.txt' + docs_content_final = site_dir / 'docs-content.txt' + + shutil.copy2(llms_temp, llms_final) + shutil.copy2(docs_content_temp, docs_content_final) + + print("✅ Generated both llms.txt (for agents) and docs-content.txt (for answer engines)") + + +if __name__ == '__main__': + main() + + diff --git a/tools/generate_llms_txt.sh b/tools/generate_llms_txt.sh new file mode 100755 index 00000000..9acaacac --- /dev/null +++ b/tools/generate_llms_txt.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# Generate AI files for local development +# This script should be run after mkdocs builds the site + +set -euo pipefail + +# Ensure site directory exists +if [ ! -d "site" ]; then + echo "Error: site directory not found. Please run 'mkdocs build' first." + exit 1 +fi + +# Generate both AI files +python3 tools/generate_llms_txt.py ./site + +echo "✅ Generated both llms.txt (for agents) and docs-content.txt (for answer engines)" + +