Skip to content

Commit 4d4b55c

Browse files
committed
Fix #3515 by preserving images quality by default
1 parent 85b53d8 commit 4d4b55c

File tree

5 files changed

+38
-9
lines changed

5 files changed

+38
-9
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ mypy:
2626
mypy pypdf --ignore-missing-imports --check-untyped --strict
2727

2828
ruff:
29-
ruff check pypdf tests make_release.py
29+
ruff check --fix pypdf tests make_release.py

pypdf/_page.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -389,16 +389,18 @@ def replace(self, new_image: Image, **kwargs: Any) -> None:
389389
b = BytesIO()
390390
new_image.save(b, "PDF", **kwargs)
391391
reader = PdfReader(b)
392-
assert reader.pages[0].images[0].indirect_reference is not None
392+
page_image = reader.pages[0].images[0]
393+
assert page_image.indirect_reference is not None
393394
self.indirect_reference.pdf._objects[self.indirect_reference.idnum - 1] = (
394-
reader.pages[0].images[0].indirect_reference.get_object()
395+
page_image.indirect_reference.get_object()
395396
)
396397
cast(
397398
PdfObject, self.indirect_reference.get_object()
398399
).indirect_reference = self.indirect_reference
399400
# change the object attributes
400401
extension, byte_stream, img = _xobj_to_image(
401-
cast(DictionaryObject, self.indirect_reference.get_object())
402+
cast(DictionaryObject, self.indirect_reference.get_object()),
403+
pillow_parameters=kwargs,
402404
)
403405
assert extension is not None
404406
self.name = self.name[: self.name.rfind(".")] + extension

pypdf/filters.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -791,15 +791,20 @@ def decode_stream_data(stream: Any) -> bytes:
791791
return data
792792

793793

794-
def _xobj_to_image(x_object: dict[str, Any]) -> tuple[Optional[str], bytes, Any]:
794+
def _xobj_to_image(
795+
x_object: dict[str, Any],
796+
pillow_parameters: Union[dict[str, Any], None] = None
797+
) -> tuple[Optional[str], bytes, Any]:
795798
"""
796799
Users need to have the pillow package installed.
797800
798801
It's unclear if pypdf will keep this function here, hence it's private.
799802
It might get removed at any point.
800803
801804
Args:
802-
x_object:
805+
x_object:
806+
pillow_parameters: parameters provided to Pillow Image.save() method,
807+
cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>
803808
804809
Returns:
805810
Tuple[file extension, bytes, PIL.Image.Image]
@@ -846,6 +851,9 @@ def _apply_alpha(
846851
extension = ".png"
847852
return img, extension, image_format
848853

854+
if pillow_parameters is None:
855+
pillow_parameters = {}
856+
849857
# For error reporting
850858
obj_as_text = (
851859
x_object.indirect_reference.__repr__()
@@ -947,10 +955,14 @@ def _apply_alpha(
947955
img, x_object, obj_as_text, image_format, extension
948956
)
949957

958+
# Preserver JPEG image quality - cf. https://github.com/py-pdf/pypdf/issues/3515
959+
if image_format == "JPEG" and "quality" not in pillow_parameters:
960+
pillow_parameters["quality"] = "keep"
961+
950962
# Save image to bytes
951963
img_byte_arr = BytesIO()
952964
try:
953-
img.save(img_byte_arr, format=image_format)
965+
img.save(img_byte_arr, format=image_format, **pillow_parameters)
954966
except OSError: # pragma: no cover # covered with pillow 10.3
955967
# in case of we convert to RGBA and then to PNG
956968
img1 = img.convert("RGBA")

pypdf/generic/_data_structures.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,14 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
10461046
retval._data = FlateDecode.encode(self._data, level)
10471047
return retval
10481048

1049-
def decode_as_image(self) -> Any:
1049+
def decode_as_image(self, pillow_parameters: Union[dict[str, Any], None] = None) -> Any:
10501050
"""
10511051
Try to decode the stream object as an image
10521052
1053+
Args:
1054+
pillow_parameters: parameters provided to Pillow Image.save() method,
1055+
cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>
1056+
10531057
Returns:
10541058
a PIL image if proper decoding has been found
10551059
Raises:
@@ -1066,7 +1070,7 @@ def decode_as_image(self) -> Any:
10661070
except AttributeError:
10671071
msg = f"{self.__repr__()} object does not seem to be an Image" # pragma: no cover
10681072
logger_warning(msg, __name__)
1069-
extension, _, img = _xobj_to_image(self)
1073+
extension, _, img = _xobj_to_image(self, pillow_parameters)
10701074
if extension is None:
10711075
return None # pragma: no cover
10721076
return img

tests/test_images.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,17 @@ def test_extract_image_from_object(caplog):
474474
assert "does not seem to be an Image" in caplog.text
475475

476476

477+
def test_extract_image_with_explicit_quality():
478+
url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
479+
name = "iss2613.pdf"
480+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
481+
image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][
482+
"/X1"
483+
].decode_as_image({"quality": 25})
484+
assert isinstance(image, Image.Image)
485+
# Work-in-progress...
486+
487+
477488
@pytest.mark.enable_socket
478489
def test_4bits_images(caplog):
479490
url = "https://github.com/user-attachments/files/16624406/tt.pdf"

0 commit comments

Comments
 (0)