openzim · benoit74 · Sep 30, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,15 +8,15 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
   - repo: https://github.com/psf/black
-    rev: '25.1.0'
+    rev: '25.9.0'
     hooks:
       - id: black
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.8
+    rev: v0.13.1
     hooks:
       - id: ruff
   - repo: https://github.com/RobertCraigie/pyright-python
-    rev: v1.1.400
+    rev: v1.1.405
     hooks:
       - id: pyright
         name: pyright (system)

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,18 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
-### Added
+### Added
 
 - New `zim.dedup.Deduplicator` class to handle automatic deduplication of content before adding to the ZIM (#33)
 
 ### Changed
 
-- Upgrade dependencies, especially wombat 3.8.12 (#262)
+- Upgrade dependencies, especially wombat 3.9.1 (#262, #263)
 - Backport changes in wabac.js around JS rewriting rules (#259)
 
 ### Fixed
 
 - JS rewriting abusively rewrite import function (#255)
+- Test about badly escaped src in HTML is failing (#264)
 
 ### Added
 

diff --git a/openzim.toml b/openzim.toml
@@ -6,5 +6,5 @@ execute_after=[
 
 [files.assets.actions."wombat.js"]
 action="get_file"
-source="https://cdn.jsdelivr.net/npm/@webrecorder/wombat@3.8.12/dist/wombat.js"
+source="https://cdn.jsdelivr.net/npm/@webrecorder/wombat@3.9.1/dist/wombat.js"
 target_file="wombat.js"
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,19 +18,19 @@ dependencies = [
   "python-magic>=0.4.3,<0.5",
   "libzim>=3.4.0,<4.0",
   "beautifulsoup4>=4.9.3,<5.0",
-  "lxml>=4.6.3,<6.0",
+  "lxml>=4.6.3,<7.0",
   "optimize-images>=1.3.6,<2.0",
   # regex has no upper-bound due to "date-based" release numbers, no semver, so their
   # promise is that they will never (or always) break the API, and the API is very
   # limited and we use only a very small subset of it.
   "regex>=2020.7.14",
   "pymupdf>=1.24.0,<2.0",
   "CairoSVG>=2.2.0,<3.0",
-  "beartype>=0.19,<0.21",
+  "beartype>=0.19,<0.22",
   # youtube-dl should be updated as frequently as possible
   "yt-dlp",
   "pillow>=7.0.0,<12.0",
-  "urllib3>=1.26.5,<2.5.0",
+  "urllib3>=1.26.5,<2.6.0",
   "piexif==1.1.3", # this dep is a nightmare in terms of release management, better pinned just like in optimize-images anyway
   "idna>=2.5,<4.0",
   "xxhash>=2.0,<4.0",
@@ -58,30 +58,30 @@ scripts = [
 
 ]
 lint = [
-  "black==25.1.0",
-  "ruff==0.11.8",
+  "black==25.9.0",
+  "ruff==0.13.1",
 ]
 check = [
-  "pyright==1.1.400",
-  "pytest==8.3.5",
+  "pyright==1.1.405",
+  "pytest==8.4.2",
 ]
 test = [
-  "pytest==8.3.5",
-  "pytest-mock==3.14.0",
-  "coverage==7.8.0",
+  "pytest==8.4.2",
+  "pytest-mock==3.15.1",
+  "coverage==7.10.7",
 ]
 docs = [
   "mkdocs==1.6.1",
-  "mkdocstrings[python]==0.29.1",
-  "mkdocs-material==9.6.12",
-  "pymdown-extensions==10.15",
+  "mkdocs-include-markdown-plugin==7.1.7",
+  "mkdocs-material==9.6.20",
+  "mkdocstrings[python]==0.30.1",
+  "pymdown-extensions==10.16.1",
   "mkdocs-gen-files==0.5.0",
   "mkdocs-literate-nav==0.6.2",
-  "mkdocs-include-markdown-plugin==7.1.5",
 ]
 dev = [
-  "ipython==9.2.0",
-  "pre-commit==4.2.0",
+  "ipython==9.5.0",
+  "pre-commit==4.3.0",
   "zimscraperlib[scripts]",
   "zimscraperlib[lint]",
   "zimscraperlib[test]",

diff --git a/src/zimscraperlib/download.py b/src/zimscraperlib/download.py
@@ -34,7 +34,9 @@ def shutdown(self) -> None:
         self.executor.shutdown(wait=True)
 
     def _run_youtube_dl(self, url: str, options: dict[str, Any]) -> None:
-        with youtube_dl.YoutubeDL(options) as ydl:
+        with youtube_dl.YoutubeDL(
+            options  # pyright: ignore[reportArgumentType]
+        ) as ydl:
             ydl.download([url])  # pyright: ignore[reportUnknownMemberType]
 
     def download(

diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py
@@ -164,6 +164,17 @@ def __eq__(self, value: object) -> bool:
             and self.native == getattr(value, "native", None)
         )
 
+    def __hash__(self):
+        return hash(
+            f"{getattr(self, "iso_639_1", None)}$"
+            f"{getattr(self, "iso_639_2b", None)}$"
+            f"{getattr(self, "iso_639_2t", None)}$"
+            f"{getattr(self, "iso_639_3", None)}$"
+            f"{getattr(self, "iso_639_5", None)}$"
+            f"{getattr(self, "english", None)}$"
+            f"{getattr(self, "native", None)}"
+        )
+
 
 def find_language_names(query: str) -> tuple[str, str]:
     """(native, english) language names for query"""

diff --git a/src/zimscraperlib/image/probing.py b/src/zimscraperlib/image/probing.py
@@ -5,9 +5,16 @@
 
 import colorthief  # pyright: ignore[reportMissingTypeStubs]
 import PIL.Image
+from PIL.Image import EXTENSION as PIL_FMT_EXTENSION
+from PIL.Image import init as init_pil
 
 from zimscraperlib.filesystem import get_content_mimetype, get_file_mimetype
 
+init_pil()  # populate the PIL_FMT_EXTENSION dictionary
+
+known_extensions = {".svg": "SVG"}
+known_extensions.update(PIL_FMT_EXTENSION)
+
 
 def get_colors(
     src: pathlib.Path, *, use_palette: bool | None = True
@@ -82,13 +89,6 @@ def format_for(
             "Cannot guess image format from file suffix when byte array is passed"
         )
 
-    from PIL.Image import EXTENSION as PIL_FMT_EXTENSION
-    from PIL.Image import init as init_pil
-
-    init_pil()  # populate the PIL_FMT_EXTENSION dictionary
-
-    known_extensions = {".svg": "SVG"}
-    known_extensions.update(PIL_FMT_EXTENSION)
     return known_extensions[src.suffix] if src.suffix in known_extensions else None
 
 

diff --git a/src/zimscraperlib/inputs.py b/src/zimscraperlib/inputs.py
@@ -2,7 +2,6 @@
 import shutil
 import tempfile
 from collections.abc import Iterable
-from typing import TypeVar
 
 from zimscraperlib import logger
 from zimscraperlib.constants import DEFAULT_USER_AGENT
@@ -14,8 +13,6 @@
 )
 from zimscraperlib.download import stream_file
 
-T = TypeVar("T")
-
 
 def handle_user_provided_file(
     source: pathlib.Path | str | None = None,
@@ -136,6 +133,6 @@ def compute_tags(
     }
 
 
-def unique_values(items: list[T]) -> list[T]:
+def unique_values[T](items: list[T]) -> list[T]:
     """Return unique values in input list while preserving list order"""
     return list(dict.fromkeys(items))
diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
@@ -381,6 +381,7 @@ def add_item_for(
     def add_item(  # pyright: ignore[reportIncompatibleMethodOverride]
         self,
         item: libzim.writer.Item,
+        *,
         duplicate_ok: bool | None = None,
         callbacks: list[Callback] | Callback | None = None,
     ):
@@ -417,6 +418,7 @@ def add_redirect(
         path: str,
         target_path: str,
         title: str | None = "",
+        *,
         is_front: bool | None = None,
         duplicate_ok: bool | None = None,
     ):

diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py
@@ -235,6 +235,7 @@ def __init__(
         title: str | None = None,
         mimetype: str | None = None,
         hints: dict[libzim.writer.Hint, int] | None = None,
+        *,
         use_disk: bool | None = None,
         **kwargs: Any,
     ):

diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
@@ -163,11 +163,6 @@ def get_libzim_value(self) -> bytes: ...
 # Alias for convenience when function accept any metadata
 AnyMetadata = MetadataBase[Any]
 
-# TypeVar bounded to subclasses of GenericMetadata, used by class decorators so that
-# they properly accommodate to the class they are used on while still knowing they have
-# access to all attributes of the MetadataBase class
-U = TypeVar("U", bound=AnyMetadata)
-
 
 def clean_str(value: str) -> str:
     """Clean a string value for unwanted control characters and strip white chars"""
@@ -179,47 +174,47 @@ def nb_grapheme_for(value: str) -> int:
     return len(regex.findall(r"\X", value))
 
 
-def mandatory(cls: type[U]):
+def mandatory[U: AnyMetadata](cls: type[U]):
     """Marks a Metadata mandatory: must be set to please Creator and cannot be empty"""
     cls.is_required = True
     cls.empty_allowed = False
     return cls
 
 
-def allow_empty(cls: type[U]):
+def allow_empty[U: AnyMetadata](cls: type[U]):
     """Whether input can be blank"""
     cls.empty_allowed = True
     return cls
 
 
-def allow_duplicates(cls: type[U]):
+def allow_duplicates[U: AnyMetadata](cls: type[U]):
     """Whether list input can accept duplicate values"""
     cls.duplicates_allowed = True
     return cls
 
 
-def deduplicate(cls: type[U]):
+def deduplicate[U: AnyMetadata](cls: type[U]):
     """Whether duplicates in list inputs should be reduced"""
     cls.duplicates_allowed = True
     cls.require_deduplication = True
     return cls
 
 
-def only_lang_codes(cls: type[U]):
+def only_lang_codes[U: AnyMetadata](cls: type[U]):
     """Whether list input should be checked to only accept ISO-639-1 codes"""
     cls.oz_only_iso636_3_allowed = True
     return cls
 
 
-def x_protected(cls: type[U]):
+def x_protected[U: AnyMetadata](cls: type[U]):
     """Whether metadata name should be checked for collision with reserved names
 
     when applying recommendations"""
     cls.oz_x_protected = True
     return cls
 
 
-def x_prefixed(cls: type[U]):
+def x_prefixed[U: AnyMetadata](cls: type[U]):
     """Whether metadata names should be automatically X-Prefixed"""
     cls.oz_x_protected = False
     cls.oz_x_prefixed = True

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2,6 +2,8 @@
 
 import pytest
 
+from zimscraperlib.download import stream_file
+
 
 def pytest_addoption(parser: pytest.Parser):
     parser.addoption(
@@ -62,12 +64,12 @@ def timeout_url() -> str:
 
 @pytest.fixture(scope="module")
 def png_image_url() -> str:
-    return "https://commons.wikimedia.org/static/images/project-logos/commonswiki.png"
+    return "https://farm.openzim.org/assets/favicon-96x96.png"
 
 
 @pytest.fixture(scope="module")
 def gzip_html_url() -> str:
-    return "https://en.wikipedia.org/wiki/Main_Page"
+    return "https://kiwix.org/en"
 
 
 @pytest.fixture(scope="module")
@@ -162,7 +164,6 @@ def valid_user_agent():
 
 @pytest.fixture(scope="session")
 def small_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path:
-    from zimscraperlib.download import stream_file
 
     dst = pathlib.Path(tmpdir_factory.mktemp("data") / "small.zim")
     stream_file(
@@ -174,7 +175,6 @@ def small_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path:
 
 @pytest.fixture(scope="session")
 def ns_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path:
-    from zimscraperlib.download import stream_file
 
     dst = pathlib.Path(tmpdir_factory.mktemp("data") / "ns.zim")
     stream_file(
@@ -187,7 +187,6 @@ def ns_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path:
 
 @pytest.fixture(scope="session")
 def real_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path:
-    from zimscraperlib.download import stream_file
 
     dst = pathlib.Path(tmpdir_factory.mktemp("data") / "small.zim")
     stream_file(

diff --git a/tests/download/test_download.py b/tests/download/test_download.py
@@ -8,7 +8,7 @@
 import pytest
 import requests
 import requests.structures
-from yt_dlp import DownloadError  # pyright: ignore[reportMissingTypeStubs]
+from yt_dlp.utils import DownloadError
 
 from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT
 from zimscraperlib.download import (

diff --git a/tests/image/test_illustration.py b/tests/image/test_illustration.py
@@ -40,7 +40,7 @@ def test_get_zim_illustration(
 
 
 def test_get_missing_user_zim_illustration():
-    with pytest.raises(Exception, match="missing.png could not be found"):
+    with pytest.raises(Exception, match=r"missing\.png could not be found"):
         get_zim_illustration("./missing.png")