diff --git a/premerge/bigquery_schema/llvm_commits_table_schema.json b/premerge/bigquery_schema/llvm_commits_table_schema.json index 7d9333d3b..7ddd27e14 100644 --- a/premerge/bigquery_schema/llvm_commits_table_schema.json +++ b/premerge/bigquery_schema/llvm_commits_table_schema.json @@ -47,6 +47,24 @@ "mode": "REPEATED", "description": "List of GitHub users who reviewed the pull request for this commit" }, + { + "name": "is_revert", + "type": "BOOLEAN", + "mode": "NULLABLE", + "description": "Whether or not this commit is a revert" + }, + { + "name": "pull_request_reverted", + "type": "INTEGER", + "mode": "NULLABLE", + "description": "Pull request matched in revert message. Not reliable for determining if a PR was reverted, `commit_reverted` may contain a commit belonging to a PR" + }, + { + "name": "commit_reverted", + "type": "STRING", + "mode": "NULLABLE", + "description": "Commit sha matched in revert message. Not reliable for determining if a commit was reverted, `pull_request_reverted` may contain a PR contributing a commit" + }, { "name": "diff", "type": "RECORD", diff --git a/premerge/ops-container/process_llvm_commits.py b/premerge/ops-container/process_llvm_commits.py index bc3d68e01..028d3b0b6 100644 --- a/premerge/ops-container/process_llvm_commits.py +++ b/premerge/ops-container/process_llvm_commits.py @@ -3,6 +3,7 @@ import logging import math import os +import re import git from google.cloud import bigquery import requests @@ -64,6 +65,9 @@ class LLVMCommitInfo: is_reviewed: bool = False is_approved: bool = False reviewers: set[str] = dataclasses.field(default_factory=set) + is_revert: bool = False + pull_request_reverted: int | None = None + commit_reverted: str | None = None def scrape_new_commits_by_date( @@ -113,26 +117,49 @@ def query_for_reviews( List of LLVMCommitInfo objects for each commit's review information. """ # Create a map of commit sha to info - new_commits = { - commit.hexsha: LLVMCommitInfo( - commit_sha=commit.hexsha, - commit_timestamp_seconds=commit.committed_date, - diff=[ - { - "file": file, - "additions": line_stats["insertions"], - "deletions": line_stats["deletions"], - "total": line_stats["lines"], - } - for file, line_stats in commit.stats.files.items() - ], - ) - for commit in new_commits - } + new_commits_info = {} + for commit in new_commits: + # Check if this commit is a revert + is_revert = ( + re.match( + r"^Revert \".*\"( \(#\d+\))?", commit.message, flags=re.IGNORECASE + ) + is not None + ) + + # Check which pull request or commit is being reverted (if any) + pull_request_match = re.search( + r"Reverts? (?:llvm\/llvm-project)?#(\d+)", commit.message, flags=re.IGNORECASE + ) + commit_match = re.search( + r"This reverts commit (\w+)", commit.message, flags=re.IGNORECASE + ) + pull_request_reverted = ( + int(pull_request_match.group(1)) if pull_request_match else None + ) + commit_reverted = commit_match.group(1) if commit_match else None + + # Add entry + new_commits_info[commit.hexsha] = LLVMCommitInfo( + commit_sha=commit.hexsha, + commit_timestamp_seconds=commit.committed_date, + diff=[ + { + "file": file, + "additions": line_stats["insertions"], + "deletions": line_stats["deletions"], + "total": line_stats["lines"], + } + for file, line_stats in commit.stats.files.items() + ], + is_revert=is_revert, + pull_request_reverted=pull_request_reverted, + commit_reverted=commit_reverted, + ) # Create GraphQL subqueries for each commit commit_subqueries = [] - for commit_sha in new_commits: + for commit_sha in new_commits_info: commit_subqueries.append( COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit_sha) ) @@ -180,7 +207,7 @@ def query_for_reviews( # Amend commit information with GitHub data for commit_sha, data in api_commit_data.items(): commit_sha = commit_sha.removeprefix("commit_") - commit_info = new_commits[commit_sha] + commit_info = new_commits_info[commit_sha] commit_info.commit_author = data["author"]["user"]["login"] # If commit has no pull requests, skip it. No data to update. @@ -201,7 +228,7 @@ def query_for_reviews( # against what we want to measure, so remove them from the set of reviewers. commit_info.reviewers.discard(commit_info.commit_author) - return list(new_commits.values()) + return list(new_commits_info.values()) def upload_daily_metrics_to_bigquery(