Skip to content

Commit dc4f725

Browse files
committed
feat: initial checkout denormalization function
1 parent 5456908 commit dc4f725

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from datetime import datetime
2+
from django.db import connections
3+
from kernelCI_app.models import Checkouts, TreeListing
4+
5+
6+
def handle_checkout_denormalization(*, buffer: list[Checkouts]) -> None:
7+
"""Deals with the operations related to the extra tables for denormalization.
8+
9+
In the case of checkouts, it will update TreeListing table, and consume from PendingCheckouts.
10+
"""
11+
12+
if not buffer:
13+
return
14+
15+
tuple_params = [
16+
(c.origin, c.tree_name, c.git_repository_branch, c.git_repository_url)
17+
for c in buffer
18+
]
19+
flattened_list = []
20+
for tuple in tuple_params:
21+
flattened_list += list(tuple)
22+
23+
# check if the tree already exists on TreeListing // check which trees exist
24+
query = f"""
25+
SELECT
26+
checkout_id,
27+
start_time
28+
FROM
29+
tree_listing t
30+
JOIN
31+
(VALUES {','.join(["(%s, %s, %s, %s)"] * len(tuple_params))})
32+
AS v(origin, tree_name, git_repository_branch, git_repository_url)
33+
ON (
34+
t.origin = v.origin
35+
AND t.tree_name = v.tree_name
36+
AND t.git_repository_branch = v.git_repository_branch
37+
AND t.git_repository_url = v.git_repository_url
38+
)
39+
"""
40+
41+
with connections["default"].cursor() as cursor:
42+
cursor.execute(query, flattened_list)
43+
results = cursor.fetchall()
44+
45+
existing_checkouts_map = {r[0]: r[1] for r in results}
46+
47+
checkouts_for_update: list[Checkouts] = []
48+
49+
# results now have the list of checkout_id that *are* in the TreeListing
50+
for checkout in buffer:
51+
# if the checkout is in treeListing, check the start_time
52+
if checkout.id in existing_checkouts_map:
53+
# if newer than existing, update
54+
checkout_start_time = datetime.fromisoformat(checkout.start_time)
55+
if checkout_start_time >= existing_checkouts_map[checkout.id]:
56+
checkouts_for_update.append(checkout)
57+
# if older than existing, ignore (no action)
58+
# if it's not on treeListing, add it
59+
else:
60+
checkouts_for_update.append(checkout)
61+
62+
if checkouts_for_update:
63+
tree_listing_objects = [
64+
TreeListing(
65+
field_timestamp=checkout.field_timestamp,
66+
checkout_id=checkout.id,
67+
origin=checkout.origin,
68+
tree_name=checkout.tree_name,
69+
git_repository_url=checkout.git_repository_url,
70+
git_repository_branch=checkout.git_repository_branch,
71+
git_commit_hash=checkout.git_commit_hash,
72+
git_commit_name=checkout.git_commit_name,
73+
git_commit_tags=checkout.git_commit_tags,
74+
start_time=checkout.start_time,
75+
origin_builds_finish_time=checkout.origin_builds_finish_time,
76+
origin_tests_finish_time=checkout.origin_tests_finish_time,
77+
# Countings are defaulted to 0 when not provided
78+
)
79+
for checkout in checkouts_for_update
80+
]
81+
82+
TreeListing.objects.bulk_create(
83+
tree_listing_objects,
84+
update_conflicts=True,
85+
unique_fields=[
86+
"origin",
87+
"tree_name",
88+
"git_repository_branch",
89+
"git_repository_url",
90+
],
91+
update_fields=[
92+
"field_timestamp",
93+
"checkout_id",
94+
"origin",
95+
"tree_name",
96+
"git_repository_url",
97+
"git_repository_branch",
98+
"git_commit_hash",
99+
"git_commit_name",
100+
"git_commit_tags",
101+
"start_time",
102+
"origin_builds_finish_time",
103+
"origin_tests_finish_time",
104+
"pass_builds",
105+
"fail_builds",
106+
"done_builds",
107+
"miss_builds",
108+
"skip_builds",
109+
"error_builds",
110+
"null_builds",
111+
"pass_boots",
112+
"fail_boots",
113+
"done_boots",
114+
"miss_boots",
115+
"skip_boots",
116+
"error_boots",
117+
"null_boots",
118+
"pass_tests",
119+
"fail_tests",
120+
"done_tests",
121+
"miss_tests",
122+
"skip_tests",
123+
"error_tests",
124+
"null_tests",
125+
],
126+
)
127+
print(f"Updated {len(checkouts_for_update)} trees in TreeListing", flush=True)

backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
import yaml
1515
import kcidb_io
1616
from django.db import transaction
17+
from kernelCI_app.management.commands.helpers.denormal import (
18+
handle_checkout_denormalization,
19+
)
1720
from kernelCI_app.models import Issues, Checkouts, Builds, Tests, Incidents
1821

1922
from kernelCI_app.management.commands.helpers.process_submissions import (
@@ -304,6 +307,9 @@ def consume_buffer(buffer: list[TableModels], item_type: TableNames) -> None:
304307
if not buffer:
305308
return
306309

310+
if item_type == "checkouts":
311+
handle_checkout_denormalization(buffer=buffer)
312+
307313
model = MODEL_MAP[item_type]
308314

309315
t0 = time.time()

0 commit comments

Comments
 (0)