Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions premerge/bigquery_schema/llvm_commits_table_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@
"mode": "REPEATED",
"description": "List of GitHub users who reviewed the pull request for this commit"
},
{
"name": "is_revert",
"type": "BOOLEAN",
"mode": "NULLABLE",
"description": "Whether or not this commit is a revert"
},
{
"name": "pull_request_reverted",
"type": "INTEGER",
"mode": "NULLABLE",
"description": "Pull request matched in revert message. Not reliable for determining if a PR was reverted, `commit_reverted` may contain a commit belonging to a PR"
},
{
"name": "commit_reverted",
"type": "STRING",
"mode": "NULLABLE",
"description": "Commit sha matched in revert message. Not reliable for determining if a commit was reverted, `pull_request_reverted` may contain a PR contributing a commit"
},
{
"name": "diff",
"type": "RECORD",
Expand Down
65 changes: 46 additions & 19 deletions premerge/ops-container/process_llvm_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import math
import os
import re
import git
from google.cloud import bigquery
import requests
Expand Down Expand Up @@ -64,6 +65,9 @@ class LLVMCommitInfo:
is_reviewed: bool = False
is_approved: bool = False
reviewers: set[str] = dataclasses.field(default_factory=set)
is_revert: bool = False
pull_request_reverted: int | None = None
commit_reverted: str | None = None


def scrape_new_commits_by_date(
Expand Down Expand Up @@ -113,26 +117,49 @@ def query_for_reviews(
List of LLVMCommitInfo objects for each commit's review information.
"""
# Create a map of commit sha to info
new_commits = {
commit.hexsha: LLVMCommitInfo(
commit_sha=commit.hexsha,
commit_timestamp_seconds=commit.committed_date,
diff=[
{
"file": file,
"additions": line_stats["insertions"],
"deletions": line_stats["deletions"],
"total": line_stats["lines"],
}
for file, line_stats in commit.stats.files.items()
],
)
for commit in new_commits
}
new_commits_info = {}
for commit in new_commits:
# Check if this commit is a revert
is_revert = (
re.match(
r"^Revert \".*\"( \(#\d+\))?", commit.message, flags=re.IGNORECASE
)
is not None
)

# Check which pull request or commit is being reverted (if any)
pull_request_match = re.search(
r"Reverts? (?:llvm\/llvm-project)?#(\d+)", commit.message, flags=re.IGNORECASE
)
commit_match = re.search(
r"This reverts commit (\w+)", commit.message, flags=re.IGNORECASE
)
pull_request_reverted = (
int(pull_request_match.group(1)) if pull_request_match else None
)
commit_reverted = commit_match.group(1) if commit_match else None

# Add entry
new_commits_info[commit.hexsha] = LLVMCommitInfo(
commit_sha=commit.hexsha,
commit_timestamp_seconds=commit.committed_date,
diff=[
{
"file": file,
"additions": line_stats["insertions"],
"deletions": line_stats["deletions"],
"total": line_stats["lines"],
}
for file, line_stats in commit.stats.files.items()
],
is_revert=is_revert,
pull_request_reverted=pull_request_reverted,
commit_reverted=commit_reverted,
)

# Create GraphQL subqueries for each commit
commit_subqueries = []
for commit_sha in new_commits:
for commit_sha in new_commits_info:
commit_subqueries.append(
COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit_sha)
)
Expand Down Expand Up @@ -180,7 +207,7 @@ def query_for_reviews(
# Amend commit information with GitHub data
for commit_sha, data in api_commit_data.items():
commit_sha = commit_sha.removeprefix("commit_")
commit_info = new_commits[commit_sha]
commit_info = new_commits_info[commit_sha]
commit_info.commit_author = data["author"]["user"]["login"]

# If commit has no pull requests, skip it. No data to update.
Expand All @@ -201,7 +228,7 @@ def query_for_reviews(
# against what we want to measure, so remove them from the set of reviewers.
commit_info.reviewers.discard(commit_info.commit_author)

return list(new_commits.values())
return list(new_commits_info.values())


def upload_daily_metrics_to_bigquery(
Expand Down