diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..a4dc00d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,57 @@ +name: Bug Report +description: Create a report to help us reproduce and fix a bug. +title: "fix: [Short description of the bug]" +labels: ["bug"] +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to report a bug. Please ensure you have read the `CONTRIBUTING.md` and checked for existing duplicate issues before proceeding. + + - type: checkboxes + id: pre-flight + attributes: + label: Pre-flight checks + options: + - label: I have checked existing open and closed issues for duplicates. + required: true + - label: I am using the latest version of the action/script. + required: true + + - type: textarea + id: environment + attributes: + label: Environment Details + description: OS version, Python version, and execution context (local vs GitHub Actions). + placeholder: e.g., Ubuntu-latest (GitHub Actions), Python 3.11 + validations: + required: true + + - type: textarea + id: steps-to-reproduce + attributes: + label: Steps to Reproduce + description: Provide a clear, step-by-step algorithm to trigger the bug. + placeholder: | + 1. Push commit with the following payload... + 2. Action triggers process_event.py... + 3. Fails with error... + validations: + required: true + + - type: textarea + id: expected-behavior + attributes: + label: Expected Behavior + description: What did you expect the script/action to do? + validations: + required: true + + - type: textarea + id: actual-behavior + attributes: + label: Actual Behavior / Traceback + description: What actually occurred? Paste stack traces or logs here. + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..8440bbb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,35 @@ +name: Feature Request +description: Suggest an enhancement or a new feature for the LLM pipeline. +title: "feat: [Short description of the feature]" +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + Thank you for suggesting an improvement! Please provide detailed context so we can understand the value of this feature. + + - type: textarea + id: problem-context + attributes: + label: Problem Context + description: Explain the problem your proposed enhancement solves. Is your feature request related to a specific frustration? + placeholder: I'm always frustrated when... + validations: + required: true + + - type: textarea + id: proposed-solution + attributes: + label: Proposed Solution & Use Cases + description: Describe your ideal solution and real-world scenarios where this feature would be beneficial. + placeholder: It would be great if the action could automatically... + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives Considered + description: Have you considered any alternative solutions or workarounds? + validations: + required: false diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..30047bb --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + labels: + - "dependencies" + - "python" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + labels: + - "dependencies" + - "actions" diff --git a/.github/labels.yml b/.github/labels.yml new file mode 100644 index 0000000..6d44895 --- /dev/null +++ b/.github/labels.yml @@ -0,0 +1,82 @@ +# Severity labels — used by trigger_action.py (7-level scale) +- name: "severity: critical" + color: "B60205" + description: "Production-breaking, security vulnerabilities, data loss risk" + +- name: "severity: high" + color: "D93F0B" + description: "Core logic changes, API modifications, breaking changes" + +- name: "severity: elevated" + color: "E36209" + description: "New features or modules, large refactors, external integrations" + +- name: "severity: medium" + color: "E4E669" + description: "New helpers, CI/CD changes, config changes, dependency updates" + +- name: "severity: moderate" + color: "B4D455" + description: "Minor feature additions, validation improvements, small behavioral changes" + +- name: "severity: low" + color: "0E8A16" + description: "Documentation updates, cosmetic changes, formatting fixes" + +- name: "severity: informational" + color: "C5DEF5" + description: "Trivial changes — typos, whitespace, comment rewording" + +# Type labels — standard issue/PR classification +- name: "bug" + color: "D73A4A" + description: "Something isn't working" + +- name: "enhancement" + color: "A2EEEF" + description: "New feature or request" + +- name: "documentation" + color: "0075CA" + description: "Improvements or additions to documentation" + +- name: "security" + color: "E11D48" + description: "Security vulnerability or audit finding" + +- name: "performance" + color: "F9D0C4" + description: "Performance or optimization issue" + +- name: "refactor" + color: "D4C5F9" + description: "Code quality or structural improvement" + +- name: "dependencies" + color: "0366D6" + description: "Dependency update (Dependabot or manual)" + +# Workflow labels +- name: "ai-generated" + color: "6F42C1" + description: "Automatically created by the AI Issue Generator" + +- name: "needs triage" + color: "FBCA04" + description: "Awaiting first review and classification" + +- name: "wontfix" + color: "FFFFFF" + description: "This will not be worked on" + +- name: "duplicate" + color: "CFD3D7" + description: "This issue or pull request already exists" + +- name: "good first issue" + color: "7057FF" + description: "Good for newcomers" + +- name: "help wanted" + color: "008672" + description: "Extra attention is needed" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..40fcbf3 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,23 @@ +## Description +## Related Issue(s) +Resolves # + +## Type of Change +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update +- [ ] Refactoring / Technical Debt + +## Testing Performed +- [ ] Local execution of `process_event.py` +- [ ] Verified JSON output structure from Gemini API +- [ ] Tested GitHub Action workflow trigger (dry-run) +- [ ] Other: + +## Checklist +- [ ] My code follows the code style of this project (PEP-8). +- [ ] I have performed a self-review of my own code. +- [ ] I have commented my code, particularly in hard-to-understand areas. +- [ ] I have made corresponding changes to the documentation (README.md, etc.). +- [ ] My changes generate no new warnings or errors in the CI pipeline. diff --git a/.github/workflows/ai-issue.yml b/.github/workflows/ai-issue.yml new file mode 100644 index 0000000..30d9ef4 --- /dev/null +++ b/.github/workflows/ai-issue.yml @@ -0,0 +1,49 @@ +name: Llama Auto-Issue Generator + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, labeled] + +permissions: read-all + +concurrency: + group: ai-issue-${{ github.ref }} + cancel-in-progress: false + +jobs: + analyze_code: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + issues: write + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install --no-cache-dir PyGithub==2.5.0 requests==2.32.3 + + - name: Run AI Analysis + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPOSITORY: ${{ github.repository }} + EVENT_NAME: ${{ github.event_name }} + COMMIT_SHA: ${{ github.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GH_MODELS_TOKEN: ${{ secrets.GH_MODELS_TOKEN }} + ALLOWED_USER: ${{ secrets.ALLOWED_USER }} + run: python "trigger action/trigger_action.py" diff --git a/.github/workflows/dependabot-auto-merge.yml b/.github/workflows/dependabot-auto-merge.yml new file mode 100644 index 0000000..c2652c7 --- /dev/null +++ b/.github/workflows/dependabot-auto-merge.yml @@ -0,0 +1,45 @@ +name: Dependabot Auto Merge + +on: + pull_request: + types: [opened, reopened, synchronize, ready_for_review] + +permissions: read-all + +concurrency: + group: dependabot-merge-${{ github.event.pull_request.number }} + cancel-in-progress: false + +jobs: + automerge: + if: github.event.pull_request.user.login == 'dependabot[bot]' + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: write + pull-requests: write + + steps: + - name: Fetch Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v2 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Approve safe non-breaking updates + if: | + github.event.pull_request.draft == false && + (steps.metadata.outputs.update-type == 'version-update:semver-patch' || + steps.metadata.outputs.update-type == 'version-update:semver-minor') + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh pr review "${{ github.event.pull_request.number }}" --approve + + - name: Enable auto-merge for safe non-breaking updates + if: | + github.event.pull_request.draft == false && + (steps.metadata.outputs.update-type == 'version-update:semver-patch' || + steps.metadata.outputs.update-type == 'version-update:semver-minor') + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh pr merge "${{ github.event.pull_request.number }}" --auto --merge diff --git a/.github/workflows/label-sync.yml b/.github/workflows/label-sync.yml new file mode 100644 index 0000000..2d7a5e4 --- /dev/null +++ b/.github/workflows/label-sync.yml @@ -0,0 +1,28 @@ +name: Label Sync + +on: + push: + branches: + - main + paths: + - '.github/labels.yml' + workflow_dispatch: + +permissions: read-all + +jobs: + sync: + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + issues: write + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Sync labels + uses: EndBug/add-and-update-labels@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + config-file: .github/labels.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..b1274c6 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,59 @@ +name: Lint and Static Checks + +on: + pull_request: + workflow_dispatch: + +concurrency: + group: lint-${{ github.ref }} + cancel-in-progress: true + +permissions: read-all + +jobs: + lint: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + ref: ${{ github.head_ref }} + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '20' + + - name: Install dependencies + run: | + if [ ! -f package.json ]; then + echo "No package.json found. Skipping npm install." + exit 0 + fi + if [ -f package-lock.json ]; then + echo "package-lock.json found. Running fast install (npm ci)..." + npm ci + else + echo "package-lock.json NOT found. Generating it (npm install)..." + npm install --no-audit --no-fund + fi + + - name: Run project lint script when available + run: | + if [ -f package.json ]; then + npm run lint --if-present + else + echo "No package.json found. Skipping lint." + fi + + - name: Validate JavaScript syntax + run: | + if [ -d api ]; then + while IFS= read -r -d '' file; do + node --check "$file" + done < <(find api -type f -name '*.js' -print0) + else + echo "No api/ directory found. Skipping JavaScript syntax validation." + fi diff --git a/.github/workflows/sast.yml b/.github/workflows/sast.yml new file mode 100644 index 0000000..89703ec --- /dev/null +++ b/.github/workflows/sast.yml @@ -0,0 +1,48 @@ +name: SAST CodeQL Scan + +on: + pull_request: + workflow_dispatch: + schedule: + - cron: '17 4 * * 1' + +permissions: read-all + +concurrency: + group: codeql-${{ github.ref }}-${{ matrix.language }} + cancel-in-progress: false + +jobs: + codeql: + name: Analyze (${{ matrix.language }}) + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: + - python + - javascript + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + queries: security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v4 + + - name: Analyze + uses: github/codeql-action/analyze@v4 + with: + category: /language:${{ matrix.language }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..02d02a4 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,35 @@ +name: OpenSSF Scorecard + +on: + push: + branches: + - main + schedule: + - cron: '30 5 * * 1' + workflow_dispatch: + +permissions: read-all + +jobs: + scorecard: + name: Scorecard analysis + runs-on: ubuntu-latest + timeout-minutes: 20 + permissions: + security-events: write + id-token: write + actions: read + contents: read + + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Run analysis + uses: ossf/scorecard-action@v2.4.3 + with: + results_file: scorecard-results.sarif + results_format: sarif + publish_results: true diff --git a/trigger action/trigger_action.py b/trigger action/trigger_action.py new file mode 100644 index 0000000..0386ee1 --- /dev/null +++ b/trigger action/trigger_action.py @@ -0,0 +1,510 @@ +"""Create GitHub issues and PR comments from AI-generated change analysis. +https://github.com/assembly-automation-hub/repo-governance + +This module runs inside GitHub Actions and inspects either push or pull request +events. It gathers the relevant diff, sends the change summary to a hosted model, +and uses the structured JSON response to open a GitHub issue and optionally post +a pull request comment. + +The entry-point is the module itself — there are no classes. Execution proceeds +top-to-bottom: environment variables are read, the diff is collected, a +role-specific prompt is assembled based on the event's trigger labels, the +hosted model is called, and finally a GitHub issue (and optional PR comment) +is created. + +Attributes: + gh_token (str | None): GitHub personal access token sourced from the + ``GITHUB_TOKEN`` environment variable. Used to authenticate all + PyGithub API calls. + model_token (str | None): Bearer token for the Azure-hosted model endpoint, + sourced from the ``GH_MODELS_TOKEN`` environment variable. + repo_name (str | None): The ``owner/repo`` identifier of the target + repository, sourced from the ``REPOSITORY`` environment variable. + event_name (str | None): The GitHub Actions event that triggered this + workflow run (``"push"`` or ``"pull_request"``), sourced from the + ``EVENT_NAME`` environment variable. + allowed_users (list[str]): Lowercase login names of GitHub users whose + events are eligible for analysis. Parsed from the comma-separated + ``ALLOWED_USER`` environment variable. + MODEL_NAME (str): The model identifier used in every inference request. + ENDPOINT (str): The Azure inference API URL for chat completions. + diff_text (str): Accumulated file-patch text collected from the triggering + commit or pull request. Capped at 10 000 characters for push events and + 80 000 characters for pull-request events to stay within model limits. + event_context (str): A short human-readable description of the event (e.g. + commit message or PR title/body) prepended to every model prompt. + author_login (str): Lowercase GitHub login of the commit author or PR + author used for allow-list enforcement. + trigger_labels (list[str]): Lowercase label strings extracted from the + commit message brackets ``[label]`` or from the PR's applied labels. + Drive prompt-role selection later in the module. + dedup_key (str): A stable identifier (e.g. ``"PR #42"`` or + ``"commit:a1b2c3d"``) embedded in every generated issue body so that + duplicate issues can be detected on subsequent runs. + pr_ref (github.PullRequest.PullRequest | None): A live PyGithub pull + request object retained for posting the summary comment, or ``None`` + when the triggering event is a push. +""" + +import os +import json +import re +import time +import requests +from github import Github, Auth + +# --------------------------------------------------------------------------- +# Environment — read once at module level so all functions share the values. +# --------------------------------------------------------------------------- +gh_token = os.environ.get("GITHUB_TOKEN") +model_token = os.environ.get("GH_MODELS_TOKEN") +repo_name = os.environ.get("REPOSITORY") +event_name = os.environ.get("EVENT_NAME") +allowed_users = [u.strip().lower() for u in os.environ.get("ALLOWED_USER", "").split(",")] + +MODEL_NAME = "Llama-3.3-70B-Instruct" +ENDPOINT = "https://models.inference.ai.azure.com/chat/completions" + +# Authenticate once; the ``repo`` object is reused throughout. +auth = Auth.Token(gh_token) +gh = Github(auth=auth) +repo = gh.get_repo(repo_name) + +# --------------------------------------------------------------------------- +# Mutable state populated by the event-routing block below. +# --------------------------------------------------------------------------- +diff_text = "" +event_context = "" +author_login = "" +trigger_labels = [] +dedup_key = "" +pr_ref = None +changed_files = [] + +# --------------------------------------------------------------------------- +# Event routing — collect the diff and metadata for push vs pull_request. +# --------------------------------------------------------------------------- +if event_name == "push": + commit_sha = os.environ.get("COMMIT_SHA") + commit = repo.get_commit(commit_sha) + + if len(commit.parents) > 1: + exit(0) + if not commit.author: + exit(0) + + author_login = commit.author.login.strip().lower() + if author_login not in allowed_users: + exit(0) + + pr_match = re.search(r'\(#(\d+)\)', commit.commit.message) + if pr_match: + dedup_key = f"PR #{pr_match.group(1)}" + else: + dedup_key = f"commit:{commit_sha[:7]}" + + event_context = f"Commit Message: {commit.commit.message}" + trigger_labels = [m.lower() for m in re.findall(r'\[(.*?)\]', commit.commit.message)] + + for file in commit.files: + changed_files.append(file.filename) + diff_text += f"File: {file.filename}\nPatch:\n{file.patch}\n\n" + if len(diff_text) > 10000: + diff_text += "\n[Diff truncated...]" + break + +elif event_name == "pull_request": + pr_number = int(os.environ.get("PR_NUMBER")) + pr = repo.get_pull(pr_number) + author_login = pr.user.login.strip().lower() + if author_login not in allowed_users: + exit(0) + + pr_ref = pr + dedup_key = f"PR #{pr_number}" + event_context = f"PR Title: {pr.title}\nPR Body: {pr.body}" + trigger_labels = [label.name.lower() for label in pr.labels] + + for file in pr.get_files(): + changed_files.append(file.filename) + diff_text += f"File: {file.filename}\nPatch:\n{file.patch}\n\n" + if len(diff_text) > 80000: + diff_text += "\n[Diff truncated...]" + break +else: + exit(0) + +if len(diff_text.strip()) < 50: + print("Diff too small to analyze. Skipping.") + exit(0) + +for issue in repo.get_issues(state="all"): + if dedup_key in (issue.body or ""): + print(f"Issue for {dedup_key} already exists (#{issue.number}), skipping.") + exit(0) + + +def was_already_closed(title_keyword: str) -> bool: + """Return whether a similar issue title already exists in closed issues.""" + for issue in repo.get_issues(state="closed"): + if title_keyword.lower() in (issue.title or "").lower(): + print(f"Similar closed issue found: #{issue.number} — skipping.") + return True + return False + + +def build_permalink(filename: str, line: int = 1) -> str: + """Build a GitHub blob permalink for a file and line number.""" + sha = os.environ.get("COMMIT_SHA") or "" + if not sha and pr_ref: + sha = pr_ref.head.sha + return f"https://github.com/{repo_name}/blob/{sha}/{filename}#L{line}" + + +# --------------------------------------------------------------------------- +# Auto-detection of change type from file paths and diff content. +# Used when no explicit labels are provided in commit message or PR. +# --------------------------------------------------------------------------- +def detect_change_type(files: list, diff: str, context: str) -> str: + """Determine the analysis role based on changed file paths and diff content. + + Inspects the list of changed files and diff text to infer what kind of + review is most appropriate. Returns a role key that maps to a specific + prompt persona. + + Args: + files: List of changed file paths. + diff: The accumulated diff text. + context: Commit message or PR title/body. + + Returns: + A string role key: "security", "deps", "ci", "docs", "frontend", + "backend", "config", or "general". + """ + files_lower = [f.lower() for f in files] + diff_lower = diff.lower() + + has_security_keywords = any(kw in diff_lower for kw in [ + "secret", "token", "password", "api_key", "apikey", "auth", + "credential", "private_key", "access_key", "bearer", + "vulnerability", "cve-", "injection", "xss", "csrf", + ]) + has_security_files = any(f for f in files_lower if any(kw in f for kw in [ + "security", "auth", ".env", "secret", + ])) + if has_security_keywords or has_security_files: + return "security" + + has_dep_files = any(f for f in files_lower if any(kw in f for kw in [ + "requirements", "package.json", "package-lock", "pipfile", + "poetry.lock", "cargo.toml", "go.sum", "gemfile", + "dependabot", "renovate", + ])) + if has_dep_files: + return "deps" + + has_ci_files = any(f for f in files_lower if any(kw in f for kw in [ + ".github/workflows/", "jenkinsfile", ".gitlab-ci", + ".circleci", "dockerfile", "docker-compose", + ".github/actions/", + ])) + if has_ci_files: + return "ci" + + has_doc_files = all( + any(kw in f for kw in [ + "readme", "contributing", "changelog", "license", + "docs/", "doc/", ".md", ".rst", ".txt", + ]) + for f in files_lower + ) + if has_doc_files and files_lower: + return "docs" + + has_frontend_files = any(f for f in files_lower if any(kw in f for kw in [ + ".html", ".css", ".jsx", ".tsx", ".vue", ".svelte", + "frontend/", "public/", "static/", "assets/", + ])) + has_frontend_keywords = any(kw in diff_lower for kw in [ + "classname", "style=", "onclick", "addeventlistener", + "document.get", "innerhtml", "appendchild", + ]) + if has_frontend_files or has_frontend_keywords: + return "frontend" + + has_backend_files = any(f for f in files_lower if any(kw in f for kw in [ + ".py", ".go", ".java", ".rb", ".rs", ".php", + "api/", "server/", "backend/", "lib/", "src/", + ])) + if has_backend_files: + return "backend" + + has_config_files = any(f for f in files_lower if any(kw in f for kw in [ + ".yml", ".yaml", ".toml", ".ini", ".cfg", ".conf", + ".json", ".env", + ])) + if has_config_files: + return "config" + + return "general" + + +# --------------------------------------------------------------------------- +# Severity guidance shared across all prompt roles. +# --------------------------------------------------------------------------- +severity_guide = """ +Use the following severity scale. You have 7 levels — pick the one that best matches the ACTUAL impact: + +CRITICAL — Production-breaking changes, security vulnerabilities (exposed secrets, SQL injection, XSS, CSRF, broken auth), data loss risks, or changes that could cause service outages. + +HIGH — Significant logic changes affecting core functionality, new or modified API endpoints, permission and access control modifications, database schema changes, removal of important functionality, breaking changes to public interfaces. + +ELEVATED — New substantial features or modules, large-scale refactors that change behavior across multiple files, integration with external services or APIs, changes to error handling or retry logic, modifications to data processing pipelines. + +MEDIUM — New utility functions or helpers, workflow and CI/CD pipeline changes, configuration changes that affect runtime behavior, dependency version updates, adding new files with meaningful functionality, structural reorganization of existing code. + +MODERATE — Minor feature additions, small behavioral changes, adding validation or input checks, improving logging or error messages, updating environment variables or build settings, adding new labels or issue templates. + +LOW — Documentation updates (README, CONTRIBUTING, comments), cosmetic UI changes without behavior impact, code formatting or style fixes, renaming without behavior change, adding badges or metadata, updating .gitignore or editor configs. + +INFORMATIONAL — Trivial changes: fixing typos, whitespace adjustments, comment rewording, version bumps in non-critical files, adding blank lines. + +IMPORTANT: Do NOT default to LOW or INFORMATIONAL. Carefully evaluate the actual scope and impact. Most code changes that add or modify functionality should be MEDIUM or higher. Use the full range of the scale. +""" + +# --------------------------------------------------------------------------- +# Structured output contract sent to the model. +# --------------------------------------------------------------------------- +base_instructions = """ +Return only a raw JSON object with no markdown formatting. The JSON must have these exact keys: + +"issue_title": string — include severity prefix like [CRITICAL], [HIGH], [ELEVATED], [MEDIUM], [MODERATE], [LOW], or [INFO] at the start, +"severity": string — one of: critical, high, elevated, medium, moderate, low, informational, +"issue_body": string — must include these sections: + ## Problem + (clear description with exact file paths and line numbers if known) + + ## Code Reference + (the exact problematic code snippet or the key changed code) + + ## Suggested Fix + (concrete code or steps to fix — or "No action required" for informational changes) + + ## Permalink + (placeholder: PUT_PERMALINK_HERE — will be replaced automatically) + +"labels": list of strings — standard GitHub labels plus the severity level, +"affected_file": string — the most relevant filename from the diff (or "" if unknown), +"affected_line": integer — approximate line number of the issue (or 1 if unknown), +"summary": string — 2-3 sentence plain-English summary for the PR comment + +The issue_title, issue_body and summary MUST be written entirely in English. +""" + +# --------------------------------------------------------------------------- +# Prompt role definitions — keyed by label match or auto-detected type. +# --------------------------------------------------------------------------- +PROMPT_ROLES = { + "security": ( + "Act as a Strict Security Auditor. Perform a deep security audit " + "(OWASP Top 10, CWE patterns). Find real vulnerabilities with exact " + "file/line references. Check for exposed secrets, injection vectors, " + "broken auth, insecure deserialization, and misconfigurations." + ), + "review": ( + "Act as a Strict Code Reviewer. Analyze code quality using SOLID, DRY, " + "and KISS principles. Point to exact lines that violate these principles. " + "Evaluate naming, error handling, and separation of concerns." + ), + "qa": ( + "Act as a QA Engineer. Identify edge cases, missing test coverage, " + "untested error paths, and potential regressions. Reference exact " + "functions and lines that need tests." + ), + "perf": ( + "Act as a Performance Expert. Analyze algorithmic complexity, identify " + "O(n²) patterns, unnecessary allocations, N+1 queries, blocking I/O, " + "and missed caching opportunities. Reference exact lines." + ), + "pm": ( + "Act as a Product Manager. Generate user-facing Release Notes with " + "clear impact descriptions. Focus on what changed for the end user, " + "not implementation details." + ), + "deps": ( + "Act as a Security & Dependency Auditor. Analyze all new or changed " + "dependencies: check for known vulnerabilities (CVEs), license " + "compatibility (MIT/Apache/GPL), package size impact, maintenance " + "status, and whether each dep is actively maintained. Reference exact " + "file and line where each dependency is added or changed." + ), + "arch": ( + "Act as a Software Architect. Review changes for architectural issues: " + "violation of separation of concerns, tight coupling, wrong layer " + "dependencies, anti-patterns (God object, spaghetti logic, magic " + "numbers, circular dependencies). Reference exact files and lines." + ), + "ci": ( + "Act as a DevOps/CI Engineer. Review the CI/CD pipeline changes for " + "correctness, security (token permissions, secret exposure), efficiency " + "(caching, parallelism, job dependencies), and best practices. Check " + "for overly broad permissions, missing timeout limits, and potential " + "race conditions in workflows." + ), + "docs": ( + "Act as a Technical Writer. Review the documentation changes for " + "completeness, accuracy, clarity, and consistency. Check that code " + "examples are correct, links are valid, and the structure is logical. " + "Note any missing sections or outdated information." + ), + "frontend": ( + "Act as a Frontend Engineer. Review the UI/UX changes for accessibility " + "(a11y), responsive design, performance (bundle size, render blocking), " + "XSS risks in DOM manipulation, proper event handling, and browser " + "compatibility. Reference exact files and lines." + ), + "backend": ( + "Act as a Senior Backend Engineer. Review the changes for correctness, " + "error handling, input validation, resource management (file handles, " + "connections), concurrency safety, and API contract compliance. " + "Reference exact files and lines." + ), + "config": ( + "Act as a Configuration & Infrastructure Reviewer. Analyze the config " + "changes for correctness, security implications (exposed ports, " + "permissive CORS, debug mode in production), consistency across " + "environments, and potential breaking changes for existing deployments." + ), + "general": ( + "Act as a Senior Software Engineer reviewing a colleague's changes. " + "Provide a thorough assessment covering: what was changed and why, " + "whether the changes introduce any risks or bugs, code quality, " + "and the overall scope and impact on the project." + ), +} + +# Maps commit-message bracket labels and PR labels to role keys. +LABEL_TO_ROLE = { + "sec": "security", "security": "security", "audit": "security", + "review": "review", "refactor": "review", "code-review": "review", + "qa": "qa", "test": "qa", "testing": "qa", + "perf": "perf", "performance": "perf", "optimize": "perf", + "pm": "pm", "release": "pm", "product": "pm", + "deps": "deps", "dependencies": "deps", "dep": "deps", + "arch": "arch", "architecture": "arch", + "ci": "ci", "devops": "ci", "pipeline": "ci", "workflow": "ci", + "docs": "docs", "doc": "docs", "documentation": "docs", + "frontend": "frontend", "ui": "frontend", "ux": "frontend", "css": "frontend", + "backend": "backend", "api": "backend", "server": "backend", + "config": "config", "infra": "config", "infrastructure": "config", +} + +# --------------------------------------------------------------------------- +# Prompt routing — label match first, then auto-detect from diff content. +# --------------------------------------------------------------------------- +role_key = None +for label in trigger_labels: + if label in LABEL_TO_ROLE: + role_key = LABEL_TO_ROLE[label] + break + +if not role_key: + role_key = detect_change_type(changed_files, diff_text, event_context) + print(f"Auto-detected change type: {role_key}") + +role_instruction = PROMPT_ROLES.get(role_key, PROMPT_ROLES["general"]) + +prompt = f"""{role_instruction} + +Do NOT invent problems that do not exist in the diff. Base your analysis strictly on what you see. +{severity_guide} +Context: {event_context} +Changed files: {', '.join(changed_files)} +Changes: {diff_text} +{base_instructions}""" + + +def call_model(prompt: str, retries: int = 3, delay: int = 5) -> dict: + """Send a review prompt to the hosted model and parse the JSON reply.""" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {model_token}" + } + payload = { + "messages": [ + {"role": "system", "content": "You are a professional software auditor. Always return valid JSON only. No markdown, no explanation, just the JSON object."}, + {"role": "user", "content": prompt} + ], + "model": MODEL_NAME, + "temperature": 0.1 + } + + for attempt in range(retries): + try: + resp = requests.post(ENDPOINT, headers=headers, json=payload, timeout=60) + resp.raise_for_status() + data = resp.json() + raw = data['choices'][0]['message']['content'].strip() + raw = re.sub(r'^```json\s*|```$', '', raw, flags=re.MULTILINE).strip() + return json.loads(raw) + except Exception as e: + print(f"Attempt {attempt + 1} failed: {e}") + if attempt < retries - 1: + time.sleep(delay) + + print("All attempts failed. Exiting gracefully.") + exit(0) + + +# --------------------------------------------------------------------------- +# Main execution — call the model and post the results to GitHub. +# --------------------------------------------------------------------------- + +result = call_model(prompt) + +title_keyword = result.get("issue_title", "")[:40] +if was_already_closed(title_keyword): + exit(0) + +affected_file = result.get("affected_file", "") +affected_line = result.get("affected_line", 1) + +if affected_file: + permalink = build_permalink(affected_file, affected_line) + issue_body = result["issue_body"].replace("PUT_PERMALINK_HERE", permalink) +else: + issue_body = result["issue_body"].replace("PUT_PERMALINK_HERE", "_No specific file identified_") + +footer = f"\n\n---\n*Generated from {dedup_key} | Auto-detected role: `{role_key}`*" + +severity = result.get("severity", "medium").lower() +severity_label_map = { + "critical": "severity: critical", + "high": "severity: high", + "elevated": "severity: elevated", + "medium": "severity: medium", + "moderate": "severity: moderate", + "low": "severity: low", + "informational": "severity: informational", +} +extra_labels = [severity_label_map.get(severity, "severity: medium")] +all_labels = list(set(result.get("labels", []) + extra_labels)) + +issue = repo.create_issue( + title=result["issue_title"], + body=issue_body + footer, + labels=all_labels +) +print(f"Created issue #{issue.number}: {issue.title}") + +if pr_ref: + summary = result.get("summary", "") + if summary: + pr_comment = ( + f"### AI Analysis Summary\n\n" + f"{summary}\n\n" + f"**Severity:** `{severity.upper()}` | **Role:** `{role_key}`\n\n" + f"Full details: #{issue.number}" + ) + pr_ref.create_issue_comment(pr_comment) + print(f"Posted summary comment to PR #{pr_ref.number}")