Skip to content

Commit 29c0f2f

Browse files
committed
Implement PEP 708
fixes: #998
1 parent 96b89db commit 29c0f2f

File tree

12 files changed

+547
-38
lines changed

12 files changed

+547
-38
lines changed

CHANGES/998.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implemented PEP 708 support, added new ProjectMetadataContent model to track a package's project level metadata at the repository level.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Generated by Django 4.2.26 on 2025-11-13 21:52
2+
3+
import django.contrib.postgres.fields
4+
from django.db import migrations, models
5+
import django.db.models.deletion
6+
import pulpcore.app.util
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
("python", "0018_packageprovenance"),
13+
]
14+
15+
operations = [
16+
migrations.AddField(
17+
model_name="pythonremote",
18+
name="project_metadata",
19+
field=models.BooleanField(default=False),
20+
),
21+
migrations.CreateModel(
22+
name="ProjectMetadataContent",
23+
fields=[
24+
(
25+
"content_ptr",
26+
models.OneToOneField(
27+
auto_created=True,
28+
on_delete=django.db.models.deletion.CASCADE,
29+
parent_link=True,
30+
primary_key=True,
31+
serialize=False,
32+
to="core.content",
33+
),
34+
),
35+
("project_name", models.TextField()),
36+
(
37+
"tracks",
38+
django.contrib.postgres.fields.ArrayField(
39+
base_field=models.TextField(), default=list, size=None
40+
),
41+
),
42+
(
43+
"alternate_locations",
44+
django.contrib.postgres.fields.ArrayField(
45+
base_field=models.TextField(), default=list, size=None
46+
),
47+
),
48+
("sha256", models.CharField(max_length=64)),
49+
(
50+
"_pulp_domain",
51+
models.ForeignKey(
52+
default=pulpcore.app.util.get_domain_pk,
53+
on_delete=django.db.models.deletion.PROTECT,
54+
to="core.domain",
55+
),
56+
),
57+
],
58+
options={
59+
"default_related_name": "%(app_label)s_%(model_name)s",
60+
"unique_together": {("sha256", "_pulp_domain")},
61+
},
62+
bases=("core.content",),
63+
),
64+
]

pulp_python/app/models.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,69 @@ class Meta:
279279
unique_together = ("sha256", "_pulp_domain")
280280

281281

282+
class ProjectMetadataContent(Content):
283+
"""
284+
A Content Type representing metadata at the project level.
285+
286+
Currently used to implement PEP 708.
287+
# TODO: Implement PEP 792
288+
Fields:
289+
project_name (models.TextField): The name of the project (normalized)
290+
tracks (models.ArrayField): Array of external repository urls that extend the project's
291+
available files (PEP 708)
292+
alternate_locations (models.ArrayField): Array of external repository urls that extends the
293+
project's namespace (PEP 708)
294+
295+
sha256 (models.CharField): Digest of all the fields above
296+
"""
297+
298+
TYPE = "project_metadata"
299+
repo_key_fields = ("project_name",)
300+
301+
project_name = models.TextField()
302+
tracks = ArrayField(models.TextField(), default=list)
303+
alternate_locations = ArrayField(models.TextField(), default=list)
304+
305+
sha256 = models.CharField(max_length=64, null=False)
306+
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
307+
308+
@classmethod
309+
def from_simple_page(cls, page):
310+
"""Creates a ProjectMetadataContent from a pypi_simple.ProjectPage."""
311+
metadata_fields = ("alternate_locations", "tracks")
312+
project_metadata = {k: getattr(page, k) for k in metadata_fields if getattr(page, k)}
313+
metadata = cls(
314+
project_name=page.project,
315+
**project_metadata,
316+
)
317+
metadata.calculate_sha256()
318+
return metadata
319+
320+
def to_metadata(self):
321+
"""Converts model to dict of present fields."""
322+
return {
323+
"tracks": self.tracks,
324+
"alternate_locations": self.alternate_locations,
325+
}
326+
327+
@hook(BEFORE_SAVE)
328+
def calculate_sha256(self):
329+
"""Calculates the sha256 from the other metadata fields."""
330+
data = {
331+
"project_name": self.project_name,
332+
"tracks": self.tracks,
333+
"alternate_locations": self.alternate_locations,
334+
}
335+
336+
metadata_json = json.dumps(data, sort_keys=True).encode("utf-8")
337+
hasher = hashlib.sha256(metadata_json)
338+
self.sha256 = hasher.hexdigest()
339+
340+
class Meta:
341+
default_related_name = "%(app_label)s_%(model_name)s"
342+
unique_together = ("sha256", "_pulp_domain")
343+
344+
282345
class PythonPublication(Publication, AutoAddObjPermsMixin):
283346
"""
284347
A Publication for PythonContent.
@@ -314,6 +377,7 @@ class PythonRemote(Remote, AutoAddObjPermsMixin):
314377
exclude_platforms = ArrayField(
315378
models.CharField(max_length=10, blank=True), choices=PLATFORMS, default=list
316379
)
380+
project_metadata = models.BooleanField(default=False)
317381

318382
def get_remote_artifact_url(self, relative_path=None, request=None):
319383
"""Get url for remote_artifact"""
@@ -339,7 +403,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
339403
"""
340404

341405
TYPE = "python"
342-
CONTENT_TYPES = [PythonPackageContent, PackageProvenance]
406+
CONTENT_TYPES = [PythonPackageContent, PackageProvenance, ProjectMetadataContent]
343407
REMOTE_TYPES = [PythonRemote]
344408
PULL_THROUGH_SUPPORTED = True
345409

pulp_python/app/pypi/views.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import json
21
import logging
32

4-
from aiohttp.client_exceptions import ClientError
53
from rest_framework.viewsets import ViewSet
64
from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer
75
from rest_framework.response import Response
@@ -27,13 +25,12 @@
2725
from packaging.utils import canonicalize_name
2826
from urllib.parse import urljoin, urlparse, urlunsplit
2927
from pathlib import PurePath
30-
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage
3128

3229
from pulpcore.plugin.viewsets import OperationPostponedResponse
3330
from pulpcore.plugin.tasking import dispatch
3431
from pulpcore.plugin.util import get_domain, get_url
35-
from pulpcore.plugin.exceptions import TimeoutException
3632
from pulp_python.app.models import (
33+
ProjectMetadataContent,
3734
PythonDistribution,
3835
PythonPackageContent,
3936
PythonPublication,
@@ -54,6 +51,7 @@
5451
PYPI_LAST_SERIAL,
5552
PYPI_SERIAL_CONSTANT,
5653
get_remote_package_filter,
54+
get_remote_simple_page,
5755
)
5856

5957
from pulp_python.app import tasks
@@ -127,6 +125,11 @@ def get_provenances(repository_version):
127125
"""Returns queryset of the provenance for this repository version."""
128126
return PackageProvenance.objects.filter(pk__in=repository_version.content)
129127

128+
@staticmethod
129+
def get_projects_metadata(repository_version):
130+
"""Returns queryset of the project metadata in this repository version."""
131+
return ProjectMetadataContent.objects.filter(pk__in=repository_version.content)
132+
130133
def should_redirect(self, repo_version=None):
131134
"""Checks if there is a publication the content app can serve."""
132135
if self.distribution.publication:
@@ -143,6 +146,12 @@ def get_rvc(self):
143146
content = self.get_content(repo_ver)
144147
return repo_ver, content
145148

149+
def get_rvcm(self):
150+
"""Takes the base_path and returns the repository_version, content, and project metadata."""
151+
repo_ver, content = self.get_rvc()
152+
project_metadata = self.get_projects_metadata(repo_ver) if repo_ver else None
153+
return repo_ver, content, project_metadata
154+
146155
def initial(self, request, *args, **kwargs):
147156
"""Perform common initialization tasks for PyPI endpoints."""
148157
super().initial(request, *args, **kwargs)
@@ -330,42 +339,37 @@ def parse_package(release_package):
330339

331340
rfilter = get_remote_package_filter(remote)
332341
if not rfilter.filter_project(package):
333-
return {}
342+
return {}, {}
334343

335-
url = remote.get_remote_artifact_url(f"simple/{package}/")
336-
remote.headers = remote.headers or []
337-
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
338-
downloader = remote.get_downloader(url=url, max_retries=1)
339-
try:
340-
d = downloader.fetch()
341-
except (ClientError, TimeoutException):
344+
page = get_remote_simple_page(package, remote)
345+
if not page:
342346
log.info(f"Failed to fetch {package} simple page from {remote.url}")
343-
return {}
347+
return {}, {}
344348

345-
if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
346-
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
347-
else:
348-
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
349-
return {
349+
releases = {
350350
p.filename: parse_package(p)
351351
for p in page.packages
352352
if rfilter.filter_release(package, p.version)
353353
}
354+
return releases, ProjectMetadataContent.from_simple_page(page).to_metadata()
354355

355356
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
356357
def retrieve(self, request, path, package):
357358
"""Retrieves the simple api html/json page for a package."""
358359
media_type = request.accepted_renderer.media_type
359360

360-
repo_ver, content = self.get_rvc()
361+
repo_ver, content, metadatas = self.get_rvcm()
361362
# Should I redirect if the normalized name is different?
362363
normalized = canonicalize_name(package)
363364
releases = {}
365+
project_metadata = {}
364366
if self.distribution.remote:
365-
releases = self.pull_through_package_simple(normalized, path, self.distribution.remote)
367+
releases, project_metadata = self.pull_through_package_simple(
368+
normalized, path, self.distribution.remote
369+
)
366370
elif self.should_redirect(repo_version=repo_ver):
367371
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
368-
if content:
372+
if content is not None:
369373
local_packages = content.filter(name__normalize=normalized)
370374
packages = local_packages.values(
371375
"filename",
@@ -393,17 +397,25 @@ def retrieve(self, request, path, package):
393397
for p in packages
394398
}
395399
releases.update(local_releases)
396-
if not releases:
400+
if metadatas is not None:
401+
local_project_metadata = (
402+
metadatas.filter(project_name=normalized)
403+
.values("tracks", "alternate_locations")
404+
.first()
405+
)
406+
if local_project_metadata:
407+
project_metadata.update(local_project_metadata)
408+
if not (releases or project_metadata):
397409
return HttpResponseNotFound(f"{normalized} does not exist.")
398410

399411
media_type = request.accepted_renderer.media_type
400412
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
401413

402414
if media_type == PYPI_SIMPLE_V1_JSON:
403-
detail_data = write_simple_detail_json(normalized, releases.values())
415+
detail_data = write_simple_detail_json(normalized, releases.values(), project_metadata)
404416
return Response(detail_data, headers=headers)
405417
else:
406-
detail_data = write_simple_detail(normalized, releases.values())
418+
detail_data = write_simple_detail(normalized, releases.values(), project_metadata)
407419
kwargs = {"content_type": media_type, "headers": headers}
408420
return HttpResponse(detail_data, **kwargs)
409421

pulp_python/app/serializers.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
artifact_to_python_content_data,
1919
get_project_metadata_from_file,
2020
parse_project_metadata,
21+
canonicalize_name,
2122
)
2223

2324

@@ -525,6 +526,52 @@ class Meta:
525526
model = python_models.PackageProvenance
526527

527528

529+
class ProjectMetadataContentSerializer(core_serializers.NoArtifactContentSerializer):
530+
"""
531+
A Serializer for ProjectMetadataContent.
532+
"""
533+
534+
project_name = serializers.CharField(
535+
required=True,
536+
help_text=_("The name of the python project."),
537+
)
538+
tracks = serializers.ListField(
539+
child=serializers.CharField(allow_blank=False),
540+
required=False,
541+
allow_empty=True,
542+
)
543+
alternate_locations = serializers.ListField(
544+
child=serializers.CharField(allow_blank=False),
545+
required=False,
546+
allow_empty=True,
547+
)
548+
sha256 = serializers.CharField(
549+
read_only=True,
550+
help_text=_("The SHA256 digest of the project metadata."),
551+
)
552+
553+
def validate_project_name(self, value):
554+
"""Ensures name is normalized."""
555+
return canonicalize_name(value)
556+
557+
def retrieve(self, validated_data):
558+
"""Retrieves the project metadata for a project."""
559+
md = python_models.ProjectMetadataContent(**validated_data)
560+
md.calculate_sha256()
561+
return python_models.ProjectMetadataContent.objects.filter(
562+
sha256=md.sha256, _pulp_domain=get_domain()
563+
).first()
564+
565+
class Meta:
566+
fields = core_serializers.NoArtifactContentSerializer.Meta.fields + (
567+
"project_name",
568+
"tracks",
569+
"alternate_locations",
570+
"sha256",
571+
)
572+
model = python_models.ProjectMetadataContent
573+
574+
528575
class MultipleChoiceArrayField(serializers.MultipleChoiceField):
529576
"""
530577
A wrapper to make sure this DRF serializer works properly with ArrayFields.
@@ -595,6 +642,11 @@ class PythonRemoteSerializer(core_serializers.RemoteSerializer):
595642
choices=python_models.PLATFORMS,
596643
default=list,
597644
)
645+
project_metadata = serializers.BooleanField(
646+
required=False,
647+
help_text=_("Whether to sync project metadata."),
648+
default=False,
649+
)
598650

599651
def validate_includes(self, value):
600652
"""Validates the includes"""
@@ -626,6 +678,7 @@ class Meta:
626678
"package_types",
627679
"keep_latest_packages",
628680
"exclude_platforms",
681+
"project_metadata",
629682
)
630683
model = python_models.PythonRemote
631684

0 commit comments

Comments
 (0)