Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cronjobs/src/commands/_git_export_git_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def clone_or_fetch(
if not repo.raw_listall_references():
print("No branches or tags found in the repository.")
else:
print("Head was at", repo.head.target)
if not repo.head_is_unborn:
print("Head was at", repo.head.target)
print(f"Fetching from {repo_url}...")
remote.fetch(callbacks=callbacks, prune=True)
else:
Expand Down
22 changes: 18 additions & 4 deletions cronjobs/src/commands/git_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ def git_export():
if not repo.raw_listall_references():
print("No branches or tags found in the repository.")
else:
print("Head is now at", repo.head.target)
if not repo.head_is_unborn:
print("Head is now at", repo.head.target)

try:
changed_attachments, changed_branches, created_tags = asyncio.run(
Expand Down Expand Up @@ -506,7 +507,9 @@ def process_attachments(
return changed_attachments, common_content


def changeset_to_branch_folder(changeset: dict[str, Any]) -> list[tuple[str, bytes]]:
def changeset_to_branch_folder(
branch_tree: pygit2.Tree | None, changeset: dict[str, Any]
) -> list[tuple[str, bytes]]:
"""
Convert a changeset to a list of files to be stored in the corresponding branch folder.
"""
Expand All @@ -516,6 +519,15 @@ def changeset_to_branch_folder(changeset: dict[str, Any]) -> list[tuple[str, byt
records = sorted(changeset["changes"], key=lambda r: r["id"])
for record in records:
branch_content.append((f"{cid}/{record['id']}.json", json_dumpb(record)))

# Delete any records that were removed in this changeset.
# (branch_tree is None on first run, and `cid` folder may not exist yet)
if branch_tree is not None and cid in branch_tree:
for entry in branch_tree[cid]:
basename = entry.name.rsplit(".json", 1)[0]
if basename != "metadata" and basename not in {r["id"] for r in records}:
branch_content.append((f"{cid}/{entry.name}", None))

return branch_content


Expand All @@ -536,7 +548,9 @@ def initialize_bucket_branches(
for bid, bucket_changesets in changesets_by_bucket.items():
branch_content: list[tuple[str, bytes]] = []
for changeset in bucket_changesets:
branch_content += changeset_to_branch_folder(changeset)
branch_content += changeset_to_branch_folder(
branch_tree=None, changeset=changeset
)

# Bucket branch does not exist yet, create it as an empty branch.
empty_tree_id = repo.TreeBuilder().write()
Expand Down Expand Up @@ -605,7 +619,7 @@ def update_bucket_branches(
dtcollection = ts2dt(timestamp).isoformat()
commit_message = f"{bid}/{cid}@{timestamp} ({dtcollection})"

branch_content = changeset_to_branch_folder(changeset)
branch_content = changeset_to_branch_folder(branch_tree, changeset)
files_tree_id = tree_upsert_blobs(
repo, branch_content, base_tree=branch_tree
)
Expand Down
73 changes: 69 additions & 4 deletions cronjobs/tests/commands/test_git_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,16 @@ def mock_rs_server_content():
)


def read_file(repo, branch, filepath):
ref = f"refs/heads/{branch}"
branch_ref = repo.lookup_reference(ref)
commit = repo[branch_ref.target]
def read_file(repo, ref_or_branch_name, filepath):
if not ref_or_branch_name.startswith("refs/"):
ref_name = f"refs/heads/{ref_or_branch_name}"
else:
ref_name = ref_or_branch_name
ref = repo.lookup_reference(ref_name)
commit = repo[ref.target]
# If it's a tag, peel to commit
if commit.type == pygit2.GIT_OBJECT_TAG:
commit = commit.peel(pygit2.GIT_OBJECT_COMMIT)
node = commit.tree
for part in filepath.split("/"):
entry = node[part]
Expand Down Expand Up @@ -588,6 +594,65 @@ def test_repo_sync_stores_collections_records_in_buckets_branches_with_tags(
assert '"attachment":{' in rid2.decode()


@responses.activate
def test_repo_sync_deletes_records_from_past_runs(
repo,
mock_git_fetch,
mock_ls_remotes,
mock_rs_server_content,
mock_github_lfs,
mock_git_push,
):
git_export.git_export()
simulate_pushed(repo, mock_ls_remotes)

# File exists before next run (not raising).
read_file(
repo, "refs/tags/v1/timestamps/bid2/cid2/1600000000000", "cid2/rid2-1.json"
)

# Now simulate that cid2 deleted its record.
responses.replace(
responses.GET,
"http://testserver:9999/v1/buckets/monitor/collections/changes/changeset",
json={
"timestamp": 1800000000000,
"changes": [
{
"last_modified": 1800000000000,
"bucket": "bid2",
"collection": "cid2",
}
],
},
)
responses.add(
responses.GET,
"http://testserver:9999/v1/buckets/bid2/collections/cid2/changeset",
json={
"timestamp": 1800000000000,
"metadata": {
"bucket": "bid2",
"id": "cid2",
"signature": {
"x5u": "https://autograph.example.com/keys/123",
},
"last_modified": 1888888888000,
},
# Record was deleted (we don't use `_since`, so no tombstone)
"changes": [],
},
)

git_export.git_export()

# File not there anymore.
with pytest.raises(KeyError):
read_file(
repo, "refs/tags/v1/timestamps/bid2/cid2/1800000000000", "cid2/rid2-1.json"
)


@responses.activate
def test_repo_sync_stores_attachments_as_lfs_pointers(
repo,
Expand Down
Loading