Skip to content

Commit 0e01561

Browse files
committed
Add PackageProvenanceContent model
1 parent 001147e commit 0e01561

File tree

8 files changed

+381
-1
lines changed

8 files changed

+381
-1
lines changed

CHANGES/+attestations.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the ability to upload PEP 740 Provenance files to repositories.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Generated by Django 4.2.26 on 2025-11-10 09:11
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import pulpcore.app.util
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
("core", "0145_domainize_import_export"),
12+
("python", "0016_pythonpackagecontent_metadata_sha256"),
13+
]
14+
15+
operations = [
16+
migrations.CreateModel(
17+
name="PackageProvenance",
18+
fields=[
19+
(
20+
"content_ptr",
21+
models.OneToOneField(
22+
auto_created=True,
23+
on_delete=django.db.models.deletion.CASCADE,
24+
parent_link=True,
25+
primary_key=True,
26+
serialize=False,
27+
to="core.content",
28+
),
29+
),
30+
("provenance", models.JSONField()),
31+
("sha256", models.CharField(max_length=64)),
32+
(
33+
"_pulp_domain",
34+
models.ForeignKey(
35+
default=pulpcore.app.util.get_domain_pk,
36+
on_delete=django.db.models.deletion.PROTECT,
37+
to="core.domain",
38+
),
39+
),
40+
(
41+
"package",
42+
models.ForeignKey(
43+
on_delete=django.db.models.deletion.CASCADE,
44+
related_name="provenances",
45+
to="python.pythonpackagecontent",
46+
),
47+
),
48+
],
49+
options={
50+
"default_related_name": "%(app_label)s_%(model_name)s",
51+
"unique_together": {("sha256", "_pulp_domain")},
52+
},
53+
bases=("core.content",),
54+
),
55+
]

pulp_python/app/models.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
import hashlib
2+
import json
13
from logging import getLogger
24

35
from aiohttp.web import json_response
46
from django.contrib.postgres.fields import ArrayField
57
from django.core.exceptions import ObjectDoesNotExist
68
from django.db import models
79
from django.conf import settings
10+
from django_lifecycle import (
11+
BEFORE_SAVE,
12+
hook,
13+
)
814
from pulpcore.plugin.models import (
915
AutoAddObjPermsMixin,
1016
Content,
@@ -15,6 +21,7 @@
1521
)
1622
from pulpcore.plugin.responses import ArtifactResponse
1723

24+
from pypi_attestations import Provenance
1825
from pathlib import PurePath
1926
from .utils import (
2027
artifact_to_python_content_data,
@@ -234,6 +241,43 @@ class Meta:
234241
]
235242

236243

244+
class PackageProvenance(Content):
245+
"""
246+
PEP 740 provenance objects.
247+
"""
248+
249+
TYPE = "provenance"
250+
repo_key_fields = ("package_id",)
251+
252+
package = models.ForeignKey(
253+
PythonPackageContent, on_delete=models.CASCADE, related_name="provenances"
254+
)
255+
provenance = models.JSONField(null=False)
256+
sha256 = models.CharField(max_length=64, null=False)
257+
258+
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
259+
260+
@staticmethod
261+
def calculate_sha256(provenance):
262+
"""Calculates the sha256 from the provenance."""
263+
provenance_json = json.dumps(provenance, sort_keys=True).encode("utf-8")
264+
hasher = hashlib.sha256(provenance_json)
265+
return hasher.hexdigest()
266+
267+
@hook(BEFORE_SAVE)
268+
def set_sha256_hook(self):
269+
"""Ensure that sha256 is set before saving."""
270+
self.sha256 = self.calculate_sha256(self.provenance)
271+
272+
@property
273+
def as_model(self):
274+
return Provenance.model_validate(self.provenance)
275+
276+
class Meta:
277+
default_related_name = "%(app_label)s_%(model_name)s"
278+
unique_together = ("sha256", "_pulp_domain")
279+
280+
237281
class PythonPublication(Publication, AutoAddObjPermsMixin):
238282
"""
239283
A Publication for PythonContent.
@@ -294,7 +338,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
294338
"""
295339

296340
TYPE = "python"
297-
CONTENT_TYPES = [PythonPackageContent]
341+
CONTENT_TYPES = [PythonPackageContent, PackageProvenance]
298342
REMOTE_TYPES = [PythonRemote]
299343
PULL_THROUGH_SUPPORTED = True
300344

pulp_python/app/serializers.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import logging
22
import os
3+
import json
34
from gettext import gettext as _
5+
from cryptography import x509
46
from django.conf import settings
57
from django.db.utils import IntegrityError
68
from packaging.requirements import Requirement
79
from rest_framework import serializers
10+
from pydantic import ValidationError
11+
from pypi_attestations import Distribution, Provenance, VerificationError
812

913
from pulpcore.plugin import models as core_models
1014
from pulpcore.plugin import serializers as core_serializers
@@ -454,6 +458,138 @@ class Meta:
454458
model = python_models.PythonPackageContent
455459

456460

461+
class PackageProvenanceSerializer(core_serializers.NoArtifactContentUploadSerializer):
462+
"""
463+
A Serializer for PackageProvenance.
464+
"""
465+
466+
package = core_serializers.DetailRelatedField(
467+
help_text=_("The package that the provenance is for."),
468+
view_name_pattern=r"content(-.*/.*)-detail",
469+
queryset=python_models.PythonPackageContent.objects.all(),
470+
)
471+
provenance = serializers.JSONField(read_only=True, default=dict)
472+
sha256 = serializers.CharField(read_only=True)
473+
verify = serializers.BooleanField(
474+
default=True,
475+
write_only=True,
476+
help_text=_("Verify each attestation in the provenance."),
477+
)
478+
479+
def deferred_validate(self, data):
480+
"""
481+
Validate that the provenance is valid and pointing to the correct package.
482+
"""
483+
data = super().deferred_validate(data)
484+
try:
485+
provenance = Provenance.model_validate_json(data["file"].read())
486+
data["provenance"] = provenance.model_dump(mode="json")
487+
except ValidationError as e:
488+
raise serializers.ValidationError(
489+
_("The uploaded provenance is not valid: {}".format(e))
490+
)
491+
if data.pop("verify"):
492+
dist = Distribution(name=data["package"].filename, digest=data["package"].sha256)
493+
try:
494+
for attestation_bundle in provenance.attestation_bundles:
495+
publisher = attestation_bundle.publisher
496+
policy = publisher._as_policy()
497+
for attestation in attestation_bundle.attestations:
498+
attestation.verify(policy, dist)
499+
except VerificationError as e:
500+
raise serializers.ValidationError(_("Provenance verification failed: {}".format(e)))
501+
return data
502+
503+
def retrieve(self, validated_data):
504+
sha256 = python_models.PackageProvenance.calculate_sha256(validated_data["provenance"])
505+
content = python_models.PackageProvenance.objects.filter(
506+
sha256=sha256, _pulp_domain=get_domain()
507+
).first()
508+
return content
509+
510+
class Meta:
511+
fields = core_serializers.NoArtifactContentUploadSerializer.Meta.fields + (
512+
"package",
513+
"provenance",
514+
"sha256",
515+
"verify",
516+
)
517+
model = python_models.PackageProvenance
518+
519+
520+
class _AttestationSerializer(serializers.Serializer):
521+
"""
522+
A simple serializer for Attestation.
523+
524+
Returns the information that `pypi-attestations inspect` provides.
525+
"""
526+
527+
version = serializers.CharField(read_only=True)
528+
statement = serializers.JSONField(read_only=True)
529+
certificate = serializers.JSONField(read_only=True)
530+
transparency_log = serializers.JSONField(read_only=True)
531+
532+
def to_representation(self, instance):
533+
statement = json.loads(instance.envelope.statement.decode())
534+
verification_material = instance.verification_material
535+
cert = x509.load_der_x509_certificate(verification_material.certificate)
536+
san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName)
537+
cert_info = {
538+
"Subjects": [name.value for name in san.value],
539+
"Issuer": cert.issuer.rfc4514_string(),
540+
"Validity": str(cert.not_valid_after_utc),
541+
}
542+
transparency_log = {
543+
"Log Indexes": [
544+
entry["logIndex"] for entry in verification_material.transparency_entries
545+
],
546+
}
547+
return {
548+
"version": instance.version,
549+
"statement": statement,
550+
"certificate": cert_info,
551+
"transparency_log": transparency_log,
552+
}
553+
554+
555+
class _AttestationBundleSerializer(serializers.Serializer):
556+
"""
557+
A simple serializer for AttestationBundle.
558+
"""
559+
560+
publisher = serializers.JSONField(read_only=True)
561+
attestations = _AttestationSerializer(many=True)
562+
563+
def to_representation(self, instance):
564+
att_field = self.fields["attestations"]
565+
return {
566+
"publisher": instance.publisher.model_dump(),
567+
"attestations": [
568+
att_field.child.to_representation(att) for att in instance.attestations
569+
],
570+
}
571+
572+
573+
class MinimalPackageProvenanceSerializer(serializers.Serializer):
574+
"""
575+
A human readable serializer for PackageProvenance.
576+
"""
577+
578+
version = serializers.CharField(read_only=True)
579+
attestation_bundles = _AttestationBundleSerializer(many=True)
580+
581+
def to_representation(self, instance):
582+
provenance = instance.as_model
583+
att_bund_field = self.fields["attestation_bundles"]
584+
return {
585+
"version": provenance.version,
586+
"attestation_bundles": [
587+
att_bund_field.child.to_representation(att_bund)
588+
for att_bund in provenance.attestation_bundles
589+
],
590+
}
591+
592+
457593
class MultipleChoiceArrayField(serializers.MultipleChoiceField):
458594
"""
459595
A wrapper to make sure this DRF serializer works properly with ArrayFields.

pulp_python/app/viewsets.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,40 @@ def upload(self, request):
400400
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
401401

402402

403+
class PackageProvenanceViewSet(core_viewsets.NoArtifactContentUploadViewSet):
404+
"""
405+
PackageProvenance represents a PEP 740 provenance object for a Python package.
406+
407+
Use ?minimal=true to get a human readable representation of the provenance.
408+
"""
409+
410+
endpoint_name = "provenance"
411+
queryset = python_models.PackageProvenance.objects.all()
412+
serializer_class = python_serializers.PackageProvenanceSerializer
413+
minimal_serializer_class = python_serializers.MinimalPackageProvenanceSerializer
414+
415+
DEFAULT_ACCESS_POLICY = {
416+
"statements": [
417+
{
418+
"action": ["list", "retrieve"],
419+
"principal": "authenticated",
420+
"effect": "allow",
421+
},
422+
{
423+
"action": ["create"],
424+
"principal": "authenticated",
425+
"effect": "allow",
426+
"condition": [
427+
"has_required_repo_perms_on_upload:python.modify_pythonrepository",
428+
"has_required_repo_perms_on_upload:python.view_pythonrepository",
429+
"has_upload_param_model_or_domain_or_obj_perms:core.change_upload",
430+
],
431+
},
432+
],
433+
"queryset_scoping": {"function": "scope_queryset"},
434+
}
435+
436+
403437
class PythonRemoteViewSet(core_viewsets.RemoteViewSet, core_viewsets.RolesMixin):
404438
"""
405439
<!-- User-facing documentation, rendered as html-->
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import pytest
2+
import requests
3+
4+
from pypi_simple import PyPISimple
5+
6+
from pulpcore.tests.functional.utils import PulpTaskError
7+
from pulp_python.tests.functional.constants import TWINE_READABLE_PROVENANCE
8+
9+
10+
@pytest.mark.parallel
11+
def test_crd_provenance(python_bindings, python_content_factory, monitor_task, bindings_cfg):
12+
"""
13+
Test creating and reading a provenance.
14+
"""
15+
filename = "twine-6.2.0-py3-none-any.whl"
16+
with PyPISimple() as client:
17+
page = client.get_project_page("twine")
18+
for package in page.packages:
19+
if package.filename == filename:
20+
content = python_content_factory(filename, url=package.url)
21+
break
22+
provenance = python_bindings.ContentProvenanceApi.create(
23+
package=content.pulp_href,
24+
file_url=package.provenance_url,
25+
)
26+
task = monitor_task(provenance.task)
27+
provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0])
28+
assert provenance.package == content.pulp_href
29+
r = requests.get(package.provenance_url)
30+
assert r.status_code == 200
31+
assert r.json() == provenance.provenance
32+
33+
url = f"{bindings_cfg.host}{provenance.pulp_href}?minimal=true"
34+
r = requests.get(url, auth=(bindings_cfg.username, bindings_cfg.password))
35+
assert r.status_code == 200
36+
assert r.json() == TWINE_READABLE_PROVENANCE
37+
38+
39+
@pytest.mark.parallel
40+
def test_verify_provenance(python_bindings, python_content_factory, monitor_task):
41+
"""
42+
Test verifying a provenance.
43+
"""
44+
filename = "twine-6.2.0.tar.gz"
45+
with PyPISimple() as client:
46+
page = client.get_project_page("twine")
47+
for package in page.packages:
48+
if package.filename == filename:
49+
break
50+
wrong_content = python_content_factory() # shelf-reader-0.1.tar.gz
51+
provenance = python_bindings.ContentProvenanceApi.create(
52+
package=wrong_content.pulp_href,
53+
file_url=package.provenance_url,
54+
)
55+
with pytest.raises(PulpTaskError) as e:
56+
monitor_task(provenance.task)
57+
assert e.value.task.state == "failed"
58+
assert "twine-6.2.0.tar.gz != shelf-reader-0.1.tar.gz" in e.value.task.error["description"]
59+
60+
# Test creating a provenance without verifying
61+
provenance = python_bindings.ContentProvenanceApi.create(
62+
package=wrong_content.pulp_href,
63+
file_url=package.provenance_url,
64+
verify=False,
65+
)
66+
task = monitor_task(provenance.task)
67+
provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0])
68+
assert provenance.package == wrong_content.pulp_href

0 commit comments

Comments
 (0)