From 063571dead11599663b634e7c2a29b242c0c125e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20R=C3=BCdenauer?= Date: Fri, 20 Feb 2026 23:00:57 +0100 Subject: [PATCH 1/3] Add mypy configuration to pyproject.toml --- pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ba32343bc..fb976fa11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,15 @@ write_to = "src/moin/_version.py" line-length = 120 skip-magic-trailing-comma = true +[tool.mypy] +python_version = "3.10" +files = ["src"] +exclude = ["/_tests/"] +ignore_missing_imports = true +show_error_codes = true +pretty = true +strict = false + [tool.pytest.ini_options] norecursedirs = [".git", "_build", "tmp*", "env*", "dlc", "wiki", "support"] addopts = "--ignore=_ui_tests" From c91195aef5a302cbfd719fc12085dd9532cf7027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20R=C3=BCdenauer?= Date: Fri, 20 Feb 2026 23:02:06 +0100 Subject: [PATCH 2/3] Type hints: correct type hint of split_namespace --- src/moin/utils/names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/moin/utils/names.py b/src/moin/utils/names.py index 6198e249a..742701fcc 100644 --- a/src/moin/utils/names.py +++ b/src/moin/utils/names.py @@ -85,7 +85,7 @@ def parent_names(names: list[str]) -> set[str]: return parents -def split_namespace(namespaces: list[str], url: str) -> tuple[str, str]: +def split_namespace(namespaces: list[str] | set[str], url: str) -> tuple[str, str]: """ Find the longest namespace in the set. Namespaces are separated by slashes (/). From b1cdb0e74d89d0b60be3b12cbd519d3e6a71387a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20R=C3=BCdenauer?= Date: Thu, 22 Jan 2026 00:26:33 +0100 Subject: [PATCH 3/3] Simplify storage backend class hierarchy and add more type hints to moin configuration classes * merge MutableBackend with Backend (moin uses only mutable backends anyway) * type hinting: any resolved storage backend is known to dervied from class BackendBase (independent from mutability) * add property 'read_only' to class BackendBase to indicate read-only backends * the routing backend now evaluates the 'read_only' property instead of checking if a backend instance is of type MutableBackend * get rid of mixin classes BytesMutableStoreMixin and FileMutableStoreMixin and their uses in stores (violating LSP) * move class ByteToStreamWrappingStore into test code * added class PasswordHasherConfig to provide PasswordHasher configuration values * add type hints to configuration classes * use class ConfigDataCache for cached configuration data (compiled regexes, password hasher) --- src/moin/app.py | 2 +- src/moin/cli/_util.py | 36 +- src/moin/config/__init__.py | 112 ++++-- src/moin/config/default.py | 377 +++++++++++------- src/moin/config/wikiconfig.py | 4 +- src/moin/storage/__init__.py | 39 +- src/moin/storage/backends/__init__.py | 48 ++- src/moin/storage/backends/_tests/__init__.py | 15 +- .../backends/_tests/test_fileserver.py | 1 + .../storage/backends/_tests/test_stores.py | 12 +- src/moin/storage/backends/_util.py | 11 +- src/moin/storage/backends/fileserver.py | 61 ++- src/moin/storage/backends/stores.py | 118 +++--- src/moin/storage/error.py | 6 + .../storage/middleware/_tests/test_routing.py | 28 +- .../middleware/_tests/test_serialization.py | 4 +- src/moin/storage/middleware/indexing.py | 60 +-- src/moin/storage/middleware/protecting.py | 47 ++- src/moin/storage/middleware/routing.py | 25 +- src/moin/storage/stores/__init__.py | 137 ++----- src/moin/storage/stores/_tests/conftest.py | 2 +- .../storage/stores/{ => _tests}/wrappers.py | 0 src/moin/storage/stores/fs.py | 61 ++- src/moin/storage/stores/memory.py | 74 +++- src/moin/storage/stores/sqla.py | 110 +++-- src/moin/storage/stores/sqlite.py | 95 +++-- 26 files changed, 901 insertions(+), 584 deletions(-) rename src/moin/storage/stores/{ => _tests}/wrappers.py (100%) diff --git a/src/moin/app.py b/src/moin/app.py index 6646df9a0..de9c7aa84 100644 --- a/src/moin/app.py +++ b/src/moin/app.py @@ -236,7 +236,7 @@ def _init_backends(app, info_name, clock): logging.debug("Wiki index not found.") -def init_backends(app, create_backend=False): +def init_backends(app, create_backend: bool = False) -> None: """ initialize the backends """ diff --git a/src/moin/cli/_util.py b/src/moin/cli/_util.py index b7ed3e731..0bc5f0430 100644 --- a/src/moin/cli/_util.py +++ b/src/moin/cli/_util.py @@ -6,39 +6,43 @@ MoinMoin - common utilities for CLI commands. """ -from typing import Optional +from __future__ import annotations from flask import current_app as app -from moin.storage.backends.stores import Backend from moin import log +from moin.storage.backends.stores import Backend logging = log.getLogger(__name__) -def get_backends(backends: Optional[str], all_backends: bool) -> set[Backend]: - """Return a set of Backends for CLI parameters. +def get_backends(backends: str | None, all_backends: bool) -> set[Backend]: + """ + Return a set of Backends for CLI parameters. :param backends: Comma-separated list of backend names. :param all_backends: If True, include all backends (overrides 'backends'). """ if all_backends: return set(app.cfg.backend_mapping.values()) - if backends: - existing_backends = set(app.cfg.backend_mapping) - backends = set(backends.split(",")) - if backends.issubset(existing_backends): - return {app.cfg.backend_mapping.get(backend_name) for backend_name in backends} - else: - print("Error: Wrong backend name given.") - print("Given Backends: %r" % backends) - print("Configured Backends: %r" % existing_backends) - else: + + if not backends: logging.warning("no backends specified") return set() + existing_backends = set(app.cfg.backend_mapping) + requested_backends = set(backends.split(",")) + if not requested_backends.issubset(existing_backends): + print("Error: Wrong backend name given.") + print("Given Backends: %r" % backends) + print("Configured Backends: %r" % existing_backends) + return set() + + return {app.cfg.backend_mapping.get(backend_name) for backend_name in requested_backends} -def drop_and_recreate_index(indexer, procs=None, limitmb=None, multisegment=False): - """Drop the index and recreate, rebuild, and optimize it. + +def drop_and_recreate_index(indexer, procs=None, limitmb=None, multisegment: bool = False) -> None: + """ + Drop the index and recreate, rebuild, and optimize it. :param indexer: IndexingMiddleware object. :param procs: Number of processors the writer will use. diff --git a/src/moin/config/__init__.py b/src/moin/config/__init__.py index 56c1a03e6..c47932170 100644 --- a/src/moin/config/__init__.py +++ b/src/moin/config/__init__.py @@ -3,12 +3,18 @@ from __future__ import annotations -from typing import Any, Protocol, TypedDict -from typing_extensions import TypeAlias +from typing import Any, Protocol, runtime_checkable, TypeAlias, TypedDict, TYPE_CHECKING +from collections.abc import Callable + +from moin.datastructures.backends import BaseDictsBackend, BaseGroupsBackend + +if TYPE_CHECKING: + from moin.auth import BaseAuth + from moin.storage.backends import BackendBase NamespaceMapping: TypeAlias = list[tuple[str, str]] -BackendMapping: TypeAlias = dict[str, Any] +BackendMapping: TypeAlias = dict[str, "BackendBase"] ItemViews: TypeAlias = list[tuple[str, str, str, bool]] @@ -24,42 +30,86 @@ class AclConfig(TypedDict): AclMapping: TypeAlias = list[tuple[str, AclConfig]] +IndexStorageConfig: TypeAlias = tuple[str, tuple[str, Any], dict] + + +class PasswordHasherConfig(TypedDict): + time_cost: int + memory_cost: int + parallelism: int + hash_len: int + salt_len: int + +@runtime_checkable class WikiConfigProtocol(Protocol): - wikiconfig_dir: str - instance_dir: str - data_dir: str - index_storage: str - serve_files: dict[str, str] - template_dirs: list[str] - interwikiname: str - interwiki_map: dict[str, str] - sitename: str - edit_locking_policy: str - edit_lock_time: int - expanded_quicklinks_size: int - admin_emails: list[str] - email_tracebacks: bool - registration_only_by_superuser: bool - registration_hint: str - user_email_verification: bool acl_functions: str - uri: str - namespaces: dict[str, str] - backends: dict[str, str] + acl_mapping: AclMapping + acl_rights_contents: list[str] + acl_rights_functions: list[str] acls: dict[str, AclConfig] - namespace_mapping: NamespaceMapping + admin_emails: list[str] + auth: list[BaseAuth] + auth_can_logout: list[str] + auth_login_inputs: list[str] + auth_have_login: bool backend_mapping: BackendMapping - acl_mapping: AclMapping - root_mapping: dict[str, str] - default_root: str - language_default: str + backends: dict[str, str] + config_check_enabled: bool content_dir: str + content_security_policy: str + content_security_policy_report_only: str + content_security_policy_limit_per_day: int + contenttype_disabled: list[str] + contenttype_enabled: list[str] + data_dir: str + default_root: str + destroy_backend: bool + dicts: Callable[[], BaseDictsBackend] + edit_locking_policy: str + edit_lock_time: int + email_tracebacks: bool endpoints_excluded: list[str] + expanded_quicklinks_size: int + groups: Callable[[], BaseGroupsBackend] + index_storage: IndexStorageConfig + instance_dir: str + interwikiname: str + interwiki_map: dict[str, str] item_views: ItemViews - supplementation_item_names: list[str] + language_default: str + locale_default: str + mail_enabled: bool + mail_from: str | None + mail_username: str | None + mail_password: str | None + mail_sendmail: str | None + mail_smarthost: str | None + markdown_extensions: list[str] = [] + mimetypes_to_index_as_empty: list[str] = [] + namespace_mapping: NamespaceMapping + namespaces: dict[str, str] navi_bar: NaviBarEntries - auth_login_inputs: list[str] - auth_have_login: bool + registration_hint: str + registration_only_by_superuser: bool + root_mapping: dict[str, str] + secrets: dict[str, str] | str + serve_files: dict[str, str] show_hosts: bool + siteid: str + sitename: str + supplementation_item_names: list[str] + template_dirs: list[str] + theme_default: str + timezone_default: str + uri: str + user_defaults: dict[str, Any] + user_email_unique: bool + user_email_verification: bool + user_gravatar_default_img: str user_homewiki: str + user_use_gravatar: bool + wikiconfig_dir: str + + _plugin_modules: list[str] + _site_plugin_lists: dict[str, dict[str, str]] diff --git a/src/moin/config/default.py b/src/moin/config/default.py index 43484f474..4ead90161 100644 --- a/src/moin/config/default.py +++ b/src/moin/config/default.py @@ -11,54 +11,148 @@ MoinMoin - Configuration defaults class """ +from __future__ import annotations + import re import os +from typing import Any, TYPE_CHECKING, NamedTuple, TypedDict + from babel import Locale, parse_locale +from moin import log from moin.i18n import _, L_, N_ from moin import error +from moin.config import PasswordHasherConfig from moin.constants.rights import ACL_RIGHTS_CONTENTS, ACL_RIGHTS_FUNCTIONS from moin.constants.keys import * from moin.items.content import content_registry_enable, content_registry_disable from moin import datastructures from moin.auth import MoinAuth from moin.utils import plugins +from moin.utils.crypto import PasswordHasher from moin.security import AccessControlList, DefaultSecurityPolicy -from moin import log +if TYPE_CHECKING: + from collections.abc import Callable + from moin.auth import BaseAuth + from moin.config import AclConfig, AclMapping, BackendMapping, ItemViews, NamespaceMapping, NaviBarEntries + from moin.datastructures.backends import BaseDictsBackend, BaseGroupsBackend logging = log.getLogger(__name__) -class CacheClass: - """Just a container for stuff we cache.""" +class ConfigDataCache: + """ + Just a container for stuff we cache. + """ + + pwd_hasher: PasswordHasher + + def __init__(self, config: ConfigFunctionality) -> None: + + try: + self.pwd_hasher = PasswordHasher(**config.password_hasher_config) + except (ValueError, TypeError) as err: + raise error.ConfigurationError(f"password_hasher_config configuration is invalid [{err}].") + + # pre-compile some regexes + self.item_dict_regex = re.compile(config.item_dict_regex, re.UNICODE) + self.item_group_regex = re.compile(config.item_group_regex, re.UNICODE) + + # the ..._regexact versions only match if nothing is left (exact match) + self.item_dict_regexact = re.compile(f"^{config.item_dict_regex}$", re.UNICODE) + self.item_group_regexact = re.compile(f"^{config.item_group_regex}$", re.UNICODE) - pass + # compiled functions ACL + self.acl_functions = AccessControlList([config.acl_functions], valid=config.acl_rights_functions) class ConfigFunctionality: - """Configuration base class with config class behavior. + """ + Configuration base class with config class behavior. This class contains the functionality for the DefaultConfig class for the benefit of the WikiConfig macro. """ - # attributes of this class that should not be shown - # in the WikiConfig() macro. - siteid = None - cache = None - mail_enabled = None - auth_can_logout = None - auth_have_login = None - auth_login_inputs = None - _site_plugin_lists = None - markdown_extensions = [] - - def __init__(self): - """Initialize Config instance.""" - self.cache = CacheClass() - + # fields dynamically added to the configuration via invocation of _add_options_to_defconfig at the end of this file + acl_functions: str + acl_mapping: AclMapping + acl_rights_contents: list[str] + acl_rights_functions: list[str] + acls: dict[str, AclConfig] + admin_emails: list[str] + auth: list[BaseAuth] + auth_can_logout: list[str] + auth_have_login: bool + auth_login_inputs: list[str] + backend_mapping: BackendMapping + backends: dict[str, str | None] + cache: ConfigDataCache + config_check_enabled: bool + content_dir: str + content_security_policy: str + content_security_policy_report_only: str + content_security_policy_limit_per_day: int + contenttype_disabled: list[str] + contenttype_enabled: list[str] + data_dir: str + default_root: str + destroy_backend: bool + dicts: Callable[[], BaseDictsBackend] + edit_lock_time: int + edit_locking_policy: str + email_tracebacks: bool + endpoints_excluded: list[str] + expanded_quicklinks_size: int + groups: Callable[[], BaseGroupsBackend] + index_storage: tuple[str, list[Any] | tuple[Any, ...], dict[str, Any]] + instance_dir: str + interwikiname: str + interwiki_map: dict[str, str] + item_dict_regex: str + item_group_regex: str + item_views: ItemViews + language_default: str + locale_default: str + mail_enabled: bool + mail_from: str | None + mail_password: str | None + mail_sendmail: str | None + mail_smarthost: str | None + mail_username: str | None + markdown_extensions: list[str] = [] + namespace_mapping: NamespaceMapping + namespaces: dict[str, str] + navi_bar: NaviBarEntries + password_hasher_config: PasswordHasherConfig + registration_hint: str + registration_only_by_superuser: bool + root_mapping: dict[str, str] + secrets: dict[str, str] | str + serve_files: dict[str, str] + show_hosts: bool + siteid: str + sitename: str + supplementation_item_names: list[str] + template_dirs: list[str] + theme_default: str + timezone_default: str + uri: str + user_defaults: dict[str, Any] + user_email_unique: bool + user_email_verification: bool + user_homewiki: str + wikiconfig_dir: str + + _plugin_modules: list[str] + _site_plugin_lists: dict[str, dict[str, str]] + + def __init__(self) -> None: + """ + Initialize Config instance. + """ if self.config_check_enabled: self._config_check() @@ -69,16 +163,8 @@ def __init__(self): # Try to decode certain names that allow Unicode self._decode() - # After that, pre-compile some regexes - self.cache.item_dict_regex = re.compile(self.item_dict_regex, re.UNICODE) - self.cache.item_group_regex = re.compile(self.item_group_regex, re.UNICODE) - - # the ..._regexact versions only match if nothing is left (exact match) - self.cache.item_dict_regexact = re.compile(f"^{self.item_dict_regex}$", re.UNICODE) - self.cache.item_group_regexact = re.compile(f"^{self.item_group_regex}$", re.UNICODE) - - # compiled functions ACL - self.cache.acl_functions = AccessControlList([self.acl_functions], valid=self.acl_rights_functions) + # After that, create our cache instance + self.cache = ConfigDataCache(self) plugins._loadPluginModule(self) @@ -150,7 +236,7 @@ def __init__(self): "(minimum length is {} chars)!".format(secret_min_length) ) # for lazy people: set all required secrets to same value - secrets = {} + secrets: dict[str, str] = {} for key in secret_key_names: secrets[key] = self.secrets self.secrets = secrets @@ -168,13 +254,6 @@ def __init__(self): ) ) - from moin.utils.crypto import PasswordHasher - - try: - self.cache.pwd_hasher = PasswordHasher(**self.password_hasher_config) - except (ValueError, TypeError) as err: - raise error.ConfigurationError(f"password_hasher_config configuration is invalid [{err}].") - if len(self.contenttype_enabled): content_registry_enable(self.contenttype_enabled) elif len(self.contenttype_disabled): @@ -265,8 +344,8 @@ def __getitem__(self, item): class DefaultConfig(ConfigFunctionality): - """Configuration base class with default config values - (added below) + """ + Configuration base class with default config values (added below) """ # Do not add anything into this class. Functionality must @@ -275,7 +354,9 @@ class DefaultConfig(ConfigFunctionality): # the options dictionary. -def _default_password_checker(cfg, username, password, min_length=8, min_different=5): +def _default_password_checker( + cfg, username: str, password: str, min_length: int = 8, min_different: int = 5 +) -> str | None: """Check if a password is secure enough. We use a built-in check to get rid of the worst passwords. @@ -321,9 +402,21 @@ def _default_password_checker(cfg, username, password, min_length=8, min_differe class DefaultExpression: - def __init__(self, exprstr): - self.text = exprstr - self.value = eval(exprstr) + def __init__(self, expr: str): + self.text = expr + self.value = eval(expr) + + +class Option(NamedTuple): + name: str + default_value: Any | None + description: str + + +class OptionsGroup(NamedTuple): + short_description: str + long_description: str | None + options: tuple[Option, ...] # @@ -331,20 +424,20 @@ def __init__(self, exprstr): # group name, see below (at the options dict) for more # information on the layout of this structure. # -options_no_group_name = { +options_no_group_name: dict[str, OptionsGroup] = { # ========================================================================== - "datastructures": ( + "datastructures": OptionsGroup( "Datastruct", None, ( # ('dicts', lambda cfg: datastructures.ConfigDicts({}), - ( + Option( "dicts", lambda cfg: datastructures.WikiDicts(), "function f(cfg) that returns a backend which is used to access dicts definitions.", ), # ('groups', lambda cfg: datastructures.ConfigGroups({}), - ( + Option( "groups", lambda cfg: datastructures.WikiGroups(), "function f(cfg) that returns a backend which is used to access groups definitions.", @@ -352,30 +445,32 @@ def __init__(self, exprstr): ), ), # ========================================================================== - "auth": ( + "auth": OptionsGroup( "Authentication / Authorization / Security", None, ( - ("auth", DefaultExpression("[MoinAuth()]"), "list of auth objects, to be called in order as specified"), - ( + Option( + "auth", DefaultExpression("[MoinAuth()]"), "list of auth objects, to be called in order as specified" + ), + Option( "secrets", None, """Either a long shared secret string used for multiple purposes or a dict {"purpose": "longsecretstring", ...} for setting up different shared secrets for different purposes.""", ), - ( + Option( "SecurityPolicy", DefaultSecurityPolicy, "Class object hook for implementing security restrictions or relaxations", ), - ("endpoints_excluded", [], "Exclude unwanted endpoints (list of strings)"), - ( + Option("endpoints_excluded", [], "Exclude unwanted endpoints (list of strings)"), + Option( "password_checker", DefaultExpression("_default_password_checker"), 'does simple checks whether a password is acceptable (you can switch this off by using "None" or enhance it by using a custom checker)', ), - ( + Option( "password_hasher_config", - dict( + PasswordHasherConfig( # Argon2id parameters for password hashing # time_cost: number of iterations (default: 2) time_cost=2, @@ -390,7 +485,7 @@ def __init__(self, exprstr): ), "Argon2 PasswordHasher configuration parameters", ), - ( + Option( "allow_style_attributes", False, "trust editors to not abuse style attribute security holes within HTML (CKEditor) or Markdown items", @@ -398,26 +493,26 @@ def __init__(self, exprstr): ), ), # ========================================================================== - "style": ( + "style": OptionsGroup( "Style / Theme / UI", "These settings control how the wiki user interface will look like.", ( - ( + Option( "sitename", "Untitled Wiki", "Short description of your wiki site, displayed below the logo on each page, and used in RSS documents as the channel title [Unicode]", ), - ( + Option( "interwikiname", None, "unique, stable and required InterWiki name (prefix, moniker) of the site [Unicode]", ), - ( + Option( "html_pagetitle", None, "Allows you to set a specific HTML page title (if None, it defaults to the value of 'sitename') [Unicode]", ), - ( + Option( "navi_bar", [ # cls, endpoint, args, link_text, title @@ -432,9 +527,9 @@ def __init__(self, exprstr): ], "Data to create the navi_bar from. Users can add more items in their quick links in user preferences. You need to configure a list of tuples (css_class, endpoint, args, label, title). Use L_() for translating. [list of tuples]", ), - ("expanded_quicklinks_size", 8, "Number of quicklinks to show as expanded in navi bar"), - ("theme_default", "topside", "Default theme."), - ( + Option("expanded_quicklinks_size", 8, "Number of quicklinks to show as expanded in navi bar"), + Option("theme_default", "topside", "Default theme."), + Option( "serve_files", {}, """ @@ -442,14 +537,16 @@ def __init__(self, exprstr): from the filesystem as url .../+serve//... """, ), - ( + Option( "supplementation_item_names", [_("Discussion")], "List of names of the supplementation (sub)items [Unicode]", ), - ("interwiki_preferred", [], "In dialogues, show those wikis at the top of the list [list of Unicode]."), - ("trail_size", 5, "Number of items in the trail of recently visited items"), - ( + Option( + "interwiki_preferred", [], "In dialogues, show those wikis at the top of the list [list of Unicode]." + ), + Option("trail_size", 5, "Number of items in the trail of recently visited items"), + Option( "item_views", [ # (endpointname, label, title, check_item_exists @@ -474,84 +571,84 @@ def __init__(self, exprstr): ], "list of edit bar entries (list of tuples (endpoint, label, title, exists))", ), - ("show_hosts", True, "if True, show host names and IPs. Set to False to hide them."), - ("show_interwiki", False, "if True, let the theme display your interwiki name"), - ("show_names", True, "if True, show user names in the revision history. Set to False to hide them."), - ("show_section_numbers", False, "show section numbers in headings by default"), - ("show_rename_redirect", False, "if True, offer creation of redirect pages when renaming wiki pages"), - ("template_dirs", [], "list of directories with templates that will override theme and base templates."), + Option("show_hosts", True, "if True, show host names and IPs. Set to False to hide them."), + Option("show_interwiki", False, "if True, let the theme display your interwiki name"), + Option("show_names", True, "if True, show user names in the revision history. Set to False to hide them."), + Option("show_section_numbers", False, "show section numbers in headings by default"), + Option("show_rename_redirect", False, "if True, offer creation of redirect pages when renaming wiki pages"), + Option( + "template_dirs", [], "list of directories with templates that will override theme and base templates." + ), ), ), # ========================================================================== - "editor": ( + "editor": OptionsGroup( "Editor", None, ( - ( - "edit_locking_policy", - "lock", - "Editor locking policy: None or 'lock'", + Option( + "edit_locking_policy", "lock", "Editor locking policy: None or 'lock'" ), # 'warn' as in 1.9.x is not supported - ("edit_lock_time", 10, "Time, in minutes, to hold or renew edit lock at start of edit or preview"), + Option("edit_lock_time", 10, "Time, in minutes, to hold or renew edit lock at start of edit or preview"), # ('item_license', '', 'not used: maybe page_license_enabled from 1.9.x; if set, show the license item within the editor. [Unicode]'), # ('edit_ticketing', True, 'not used: maybe a remnant of https://moinmo.in/TicketSystem'), ), ), # ========================================================================== - "paging": ( + "paging": OptionsGroup( "Paging", None, - (("results_per_page", 50, "Number of results to be shown on a single page in pagination"),), + (Option("results_per_page", 50, "Number of results to be shown on a single page in pagination"),), ), # ========================================================================== - "data": ( + "data": OptionsGroup( "Data Storage", None, ( - ("data_dir", "./data/", "Path to the data directory."), - ("plugin_dirs", [], "Plugin directories."), - ("interwiki_map", {}, "Dictionary of wiki_name -> wiki_url"), - ( + Option("data_dir", "./data/", "Path to the data directory."), + Option("plugin_dirs", [], "Plugin directories."), + Option("interwiki_map", {}, "Dictionary of wiki_name -> wiki_url"), + Option( "namespace_mapping", None, "A list of tuples, each tuple containing: Namespace identifier, backend name. " + "E.g.: [('', 'default')), ].", ), - ( + Option( "backend_mapping", None, "A dictionary that maps backend names to backends. " + "E.g.: {'default': Backend(), }.", ), - ( + Option( "acl_mapping", None, "This needs to point to a list of tuples, each tuple containing: name prefix, acl protection to be applied to matching items. " + "E.g.: [('', dict(default='All:read,write,create,admin')), ].", ), - ("mimetypes_to_index_as_empty", [], "List of mimetypes which are indexed as though they were empty."), + Option("mimetypes_to_index_as_empty", [], "List of mimetypes which are indexed as though they were empty."), ), ), # ========================================================================== - "items": ( + "items": OptionsGroup( "Special Item Names", None, ( - ( + Option( "default_root", "Home", "Default root, use this value in case no match is found in root_mapping. [Unicode]", ), - ("root_mapping", {}, "mapping of namespaces to unique root items."), + Option("root_mapping", {}, "mapping of namespaces to unique root items."), # the following regexes should match the complete name when used in free text # the group 'all' shall match all, while the group 'key' shall match the key only # e.g. FooGroup -> group 'all' == FooGroup, group 'key' == Foo # moin's code will add ^ / $ at beginning / end when needed - ( + Option( "item_dict_regex", r"(?P(?P\S+)Dict)", "Item names exactly matching this regex are regarded as items containing variable dictionary definitions [Unicode]", ), - ( + Option( "item_group_regex", r"(?P(?P\S+)Group)", "Item names exactly matching this regex are regarded as items containing group definitions [Unicode]", @@ -559,11 +656,11 @@ def __init__(self, exprstr): ), ), # ========================================================================== - "user": ( + "user": OptionsGroup( "User Preferences", None, ( - ( + Option( "user_defaults", { NAME: [], @@ -602,16 +699,16 @@ def __init__(self, exprstr): ), ), # ========================================================================== - "various": ( + "various": OptionsGroup( "Various", None, ( # ('bang_meta', True, 'if True, enable {{{#!NoWikiName}}} markup'), - ("config_check_enabled", False, "if True, check configuration for unknown settings."), - ("timezone_default", "UTC", "Default time zone."), - ("locale_default", "en_US", "Default locale for user interface and content."), + Option("config_check_enabled", False, "if True, check configuration for unknown settings."), + Option("timezone_default", "UTC", "Default time zone."), + Option("locale_default", "en_US", "Default locale for user interface and content."), # ('log_remote_addr', True, "if True, log the remote IP address (and maybe hostname)."), - ( + Option( "log_reverse_dns_lookups", True, "if True, do a reverse DNS lookup on page SAVE. If your DNS is broken, set this to False to speed up SAVE.", @@ -619,27 +716,29 @@ def __init__(self, exprstr): # some dangerous mimetypes (we don't use "content-disposition: inline" for them when a user # downloads such data, because the browser might execute e.g. Javascript contained # in the HTML and steal your moin session cookie or do other nasty stuff) - ( + Option( "mimetypes_xss_protect", ["text/html", "application/x-shockwave-flash", "application/xhtml+xml"], '"content-disposition: inline" is not used for downloads of such data', ), # ('refresh', None, "refresh = (minimum_delay_s, targets_allowed) enables use of '#refresh 5 PageName' processing instruction, targets_allowed must be either 'internal' or 'external'"), - ("siteid", "MoinMoin", None), # XXX just default to some existing module name to + Option("siteid", "MoinMoin", "Id of the wiki site"), # XXX just default to some existing module name to # make plugin loader etc. work for now - ("contenttype_disabled", [], "List of disabled content types. Ignored if contenttype_enabled is set."), - ( + Option( + "contenttype_disabled", [], "List of disabled content types. Ignored if contenttype_enabled is set." + ), + Option( "contenttype_enabled", [], "List of available content types for new items. Default: [] (all types enabled).", ), - ("content_security_policy", "", "Content security policy."), - ( + Option("content_security_policy", "", "Content security policy."), + Option( "content_security_policy_report_only", "default-src 'self'; script-src 'self'; style-src 'self'; img-src 'self';", "Content security policy in report-only mode.", ), - ("content_security_policy_limit_per_day", 100, "Limit of reports logged per day."), + Option("content_security_policy_limit_per_day", 100, "Limit of reports logged per day."), ), ), } @@ -665,70 +764,69 @@ def __init__(self, exprstr): # you can use the DefaultExpression class, see 'auth' above for example. # # -options = { - "acl": ( + +options: dict[str, OptionsGroup] = { + "acl": OptionsGroup( "Access Control Lists", "ACLs control who may do what.", ( - ("functions", "", "Access Control List for functions."), - ("rights_contents", ACL_RIGHTS_CONTENTS, "Valid tokens for right sides of content ACL entries."), - ("rights_functions", ACL_RIGHTS_FUNCTIONS, "Valid tokens for right sides of function ACL entries."), + Option("functions", "", "Access Control List for functions."), + Option("rights_contents", ACL_RIGHTS_CONTENTS, "Valid tokens for right sides of content ACL entries."), + Option("rights_functions", ACL_RIGHTS_FUNCTIONS, "Valid tokens for right sides of function ACL entries."), ), ), - "ns": ( + "ns": OptionsGroup( "Storage Namespaces", "Storage namespaces can be defined for all sorts of data. " "All items sharing a common namespace as prefix are then stored within the same backend. " "The common prefix for all data is ''.", ( - ( - "content", - "/", - "All content is by default stored below /, hence the prefix is ''.", + Option( + "content", "/", "All content is by default stored below /, hence the prefix is ''." ), # Not really necessary. Just for completeness. - ( + Option( "user_profile", "userprofiles/", "User profiles (i.e. user data, not their homepage) are stored in this namespace.", ), - ("user_homepage", "users/", "All user homepages are stored in this namespace."), + Option("user_homepage", "users/", "All user homepages are stored in this namespace."), ), ), - "user": ( + "user": OptionsGroup( "User", None, ( - ("email_unique", True, "if True, check email addresses for uniqueness and don't accept duplicates."), - ( + Option("email_unique", True, "if True, check email addresses for uniqueness and don't accept duplicates."), + Option( "email_verification", False, "if True, require a new user to verify his or her email address before the first login.", ), - ( + Option( "homewiki", "Self", "interwiki name of the wiki where the user home pages are located [Unicode] - useful if you have ''many'' users. You could even link to nonwiki \"user pages\" if the wiki username is in the target URL.", ), - ("use_gravatar", False, "if True, gravatar.com will be used to find User's avatar"), - ("gravatar_default_img", "blank", "default image if email not registered at gravatar.com."), + Option("use_gravatar", False, "if True, gravatar.com will be used to find User's avatar"), + Option("gravatar_default_img", "blank", "default image if email not registered at gravatar.com."), ), ), - "mail": ( + "mail": OptionsGroup( "Mail", "These settings control outgoing email from the wiki.", ( - ("from", None, "Used as From: address for generated mail. [Unicode]"), - ("username", None, "Username for SMTP server authentication (None = don't use auth)."), - ("password", None, "Password for SMTP server authentication (None = don't use auth)."), - ("smarthost", None, "Address of SMTP server to use for sending mail (None = don't use SMTP server)."), + Option("from", None, "Used as From: address for generated mail. [Unicode]"), + Option("username", None, "Username for SMTP server authentication (None = don't use auth)."), + Option("password", None, "Password for SMTP server authentication (None = don't use auth)."), + Option("smarthost", None, "Address of SMTP server to use for sending mail (None = don't use SMTP server)."), ), ), - "registration": ( + "registration": OptionsGroup( "Registration", "These settings control registration options", ( - ("only_by_superuser", False, "True is recommended value for public wikis on the internet."), - ( + Option("only_by_superuser", False, "True is recommended value for public wikis on the internet."), + Option( "hint", _("To request an account, see bottom of Home page."), "message on login page when only_by_superuser is True", @@ -738,10 +836,9 @@ def __init__(self, exprstr): } -def _add_options_to_defconfig(opts, addgroup=True): - for groupname in opts: - group_short, group_doc, group_opts = opts[groupname] - for name, default, doc in group_opts: +def _add_options_to_defconfig(opts: dict[str, OptionsGroup], addgroup: bool = True) -> None: + for groupname, options_group in opts.items(): + for name, default, _doc in options_group.options: if addgroup: name = groupname + "_" + name if isinstance(default, DefaultExpression): diff --git a/src/moin/config/wikiconfig.py b/src/moin/config/wikiconfig.py index 79128f9c0..4c3a3ebd6 100644 --- a/src/moin/config/wikiconfig.py +++ b/src/moin/config/wikiconfig.py @@ -34,6 +34,7 @@ import os +from moin.config import AclConfig from moin.config.default import DefaultConfig from moin.utils import get_xstatic_module_path_map from moin.utils.interwiki import InterWikiMap @@ -45,7 +46,6 @@ NAMESPACE_HELP_COMMON, NAMESPACE_HELP_EN, ) -from moin.config import AclConfig class Config(DefaultConfig): @@ -179,7 +179,7 @@ class Config(DefaultConfig): # custom namespace with a separate backend (a wiki/data/bar directory will be created) # 'bar': 'bar', } - backends = { + backends: dict[str, str | None] = { # maps backend name -> storage # all values in `namespaces` dict must be defined as keys in `backends` dict "default": uri, diff --git a/src/moin/storage/__init__.py b/src/moin/storage/__init__.py index a15f73083..033b9ad7a 100644 --- a/src/moin/storage/__init__.py +++ b/src/moin/storage/__init__.py @@ -30,7 +30,8 @@ NAMESPACE_HELP_COMMON, NAMESPACE_HELP_EN, ) -from moin.config import AclConfig +from moin.config import AclConfig, AclMapping, BackendMapping, NamespaceMapping +from moin.storage.backends import BackendBase BACKENDS_PACKAGE = "moin.storage.backends" @@ -41,7 +42,7 @@ BACKEND_HELP_EN = "help-en" -def backend_from_uri(uri: str): +def backend_from_uri(uri: str) -> BackendBase: """ Create a backend instance for a URI. """ @@ -49,11 +50,13 @@ def backend_from_uri(uri: str): if len(backend_name_uri) != 2: raise ValueError(f"malformed backend URI: {uri}") backend_name, backend_uri = backend_name_uri - module = __import__(BACKENDS_PACKAGE + "." + backend_name, globals(), locals(), ["MutableBackend"]) - return module.MutableBackend.from_uri(backend_uri) + module = __import__(BACKENDS_PACKAGE + "." + backend_name, globals(), locals(), ["Backend"]) + return module.Backend.from_uri(backend_uri) -def create_mapping(uri: str, namespaces: dict[str, str], backends: dict[str, str], acls: dict[str, AclConfig]): +def create_mapping( + uri: str, namespaces: dict[str, str], backends: dict[str, str | None], acls: dict[str, AclConfig] +) -> tuple[NamespaceMapping, BackendMapping, AclMapping]: # TODO "or uri" can be removed in the future, see TODO in config/wikiconfig.py backend_mapping = [ (backend_name, backend_from_uri((backends[backend_name] or uri) % dict(backend=backend_name, kind="%(kind)s"))) @@ -66,13 +69,13 @@ def create_mapping(uri: str, namespaces: dict[str, str], backends: dict[str, str def create_simple_mapping( - uri="stores:fs:instance", - default_acl=None, - userprofiles_acl=None, - users_acl=None, - help_common_acl=None, - help_en_acl=None, -): + uri: str = "stores:fs:instance", + default_acl: AclConfig | None = None, + userprofiles_acl: AclConfig | None = None, + users_acl: AclConfig | None = None, + help_common_acl: AclConfig | None = None, + help_en_acl: AclConfig | None = None, +) -> tuple[NamespaceMapping, BackendMapping, AclMapping]: """ When configuring storage, the admin needs to provide a namespace_mapping. To ease creation of such a mapping, this function provides sane defaults @@ -98,15 +101,15 @@ def create_simple_mapping( """ # if no acls are given, use something mostly harmless: if not default_acl: - default_acl = dict(before="", default="All:read,write,create,admin", after="", hierarchic=False) + default_acl = AclConfig(before="", default="All:read,write,create,admin", after="", hierarchic=False) if not userprofiles_acl: - userprofiles_acl = dict(before="All:", default="", after="", hierarchic=False) + userprofiles_acl = AclConfig(before="All:", default="", after="", hierarchic=False) if not users_acl: - users_acl = dict(before="", default="All:read,write,create,admin", after="", hierarchic=False) + users_acl = AclConfig(before="", default="All:read,write,create,admin", after="", hierarchic=False) if not help_common_acl: - help_common_acl = dict(before="", default="All:read,write,create,admin", after="", hierarchic=False) + help_common_acl = AclConfig(before="", default="All:read,write,create,admin", after="", hierarchic=False) if not help_en_acl: - help_en_acl = dict(before="", default="All:read,write,create,admin", after="", hierarchic=False) + help_en_acl = AclConfig(before="", default="All:read,write,create,admin", after="", hierarchic=False) namespaces = { NAMESPACE_DEFAULT: BACKEND_DEFAULT, NAMESPACE_USERPROFILES: BACKEND_USERPROFILES, @@ -114,7 +117,7 @@ def create_simple_mapping( NAMESPACE_HELP_COMMON: BACKEND_HELP_COMMON, NAMESPACE_HELP_EN: BACKEND_HELP_EN, } - backends = { + backends: dict[str, str | None] = { BACKEND_DEFAULT: None, BACKEND_USERPROFILES: None, BACKEND_USERS: None, diff --git a/src/moin/storage/backends/__init__.py b/src/moin/storage/backends/__init__.py index bb52c607b..e32555a94 100644 --- a/src/moin/storage/backends/__init__.py +++ b/src/moin/storage/backends/__init__.py @@ -6,6 +6,11 @@ MoinMoin - backend base classes. """ +from __future__ import annotations + +from typing import Any, Iterator +from typing_extensions import Self + from abc import abstractmethod, ABCMeta @@ -14,63 +19,64 @@ class BackendBase(metaclass=ABCMeta): Tie together a store for metadata and a store for data, read-only. """ + @property + @abstractmethod + def read_only(self) -> bool: + """ + Indicates if the backend is read-only or not. + """ + @classmethod @abstractmethod - def from_uri(cls, uri): + def from_uri(cls, uri: str) -> Self: """ Create an instance using the data given in the URI. """ @abstractmethod - def open(self): + def create(self) -> None: """ - Open the backend; allocate resources. + Create the backend. """ @abstractmethod - def close(self): + def destroy(self) -> None: """ - Close the backend; free resources (except the stored meta/data!). + Destroy the backend; erase all meta/data it contains. """ @abstractmethod - def __iter__(self): + def open(self) -> None: """ - Iterate over meta IDs. + Open the backend; allocate resources. """ @abstractmethod - def retrieve(self, metaid): + def close(self) -> None: """ - Return meta and data related to metaid. + Close the backend; free resources (except the stored meta/data!). """ - -class MutableBackendBase(BackendBase): - """ - Same as Backend, but read/write. - """ - @abstractmethod - def create(self): + def __iter__(self) -> Iterator[str]: """ - Create the backend. + Iterate over meta IDs. """ @abstractmethod - def destroy(self): + def retrieve(self, metaid: str) -> Any: """ - Destroy the backend; erase all meta/data it contains. + Return meta and data related to metaid. """ @abstractmethod - def store(self, meta, data): + def store(self, meta, data) -> str: """ Store meta and data into the backend; return the metaid. """ @abstractmethod - def remove(self, metaid, destroy_data=False): + def remove(self, metaid: str, destroy_data: bool = False) -> None: """ Delete meta and data related to metaid from the backend. """ diff --git a/src/moin/storage/backends/_tests/__init__.py b/src/moin/storage/backends/_tests/__init__.py index 456bd37c0..419eb5f7e 100644 --- a/src/moin/storage/backends/_tests/__init__.py +++ b/src/moin/storage/backends/_tests/__init__.py @@ -5,17 +5,25 @@ MoinMoin - backend tests. """ -from io import BytesIO +from typing import Protocol import pytest +from io import BytesIO + from moin.constants.keys import SIZE, HASH_ALGORITHM +from moin.storage.backends import BackendBase + +class WithBE(Protocol): + be: BackendBase + + +class BackendTestBase(WithBE): -class BackendTestBase: def setup_method(self, method): """ - self.be needs to be an opened backend. + self.be needs to be a created an opened backend. """ raise NotImplementedError @@ -35,6 +43,7 @@ def test_iter(self): class MutableBackendTestBase(BackendTestBase): + def setup_method(self, method): """ self.be needs to be a created and opened backend. diff --git a/src/moin/storage/backends/_tests/test_fileserver.py b/src/moin/storage/backends/_tests/test_fileserver.py index ac4825091..06b60ee09 100644 --- a/src/moin/storage/backends/_tests/test_fileserver.py +++ b/src/moin/storage/backends/_tests/test_fileserver.py @@ -14,6 +14,7 @@ class TestFileServerBackend(BackendTestBase): + def setup_method(self, method): self.path = path = tempfile.mkdtemp() self.be = Backend(path) diff --git a/src/moin/storage/backends/_tests/test_stores.py b/src/moin/storage/backends/_tests/test_stores.py index 3d3c10923..04085a40e 100644 --- a/src/moin/storage/backends/_tests/test_stores.py +++ b/src/moin/storage/backends/_tests/test_stores.py @@ -11,9 +11,7 @@ import os import tempfile -from ..stores import MutableBackend -from . import MutableBackendTestBase - +from moin.storage.backends.stores import Backend from moin.storage.stores.memory import BytesStore as MemoryBytesStore from moin.storage.stores.memory import FileStore as MemoryFileStore from moin.storage.stores.fs import BytesStore as FSBytesStore @@ -21,12 +19,14 @@ from moin.storage.stores.sqla import BytesStore as SQLABytesStore from moin.storage.stores.sqla import FileStore as SQLAFileStore +from . import MutableBackendTestBase + class TestMemoryBackend(MutableBackendTestBase): def setup_method(self, method): meta_store = MemoryBytesStore() data_store = MemoryFileStore() - self.be = MutableBackend(meta_store, data_store) + self.be = Backend(meta_store, data_store) self.be.create() self.be.open() @@ -39,7 +39,7 @@ def setup_method(self, method): data_path = tempfile.mkdtemp() os.rmdir(data_path) data_store = FSFileStore(data_path) - self.be = MutableBackend(meta_store, data_store) + self.be = Backend(meta_store, data_store) self.be.create() self.be.open() @@ -50,6 +50,6 @@ def setup_method(self, method): data_path = tempfile.mktemp() meta_store = SQLABytesStore(f"sqlite:///{meta_path}") data_store = SQLAFileStore(f"sqlite:///{data_path}") - self.be = MutableBackend(meta_store, data_store) + self.be = Backend(meta_store, data_store) self.be.create() self.be.open() diff --git a/src/moin/storage/backends/_util.py b/src/moin/storage/backends/_util.py index 7f6095d77..e4ec3c3d4 100644 --- a/src/moin/storage/backends/_util.py +++ b/src/moin/storage/backends/_util.py @@ -6,7 +6,10 @@ MoinMoin - backend utilities. """ +from __future__ import annotations +from typing import Any import hashlib +from io import BytesIO class TrackingFileWrapper: @@ -19,7 +22,7 @@ class TrackingFileWrapper: The hash property is the hash instance; call hash.hexdigest() if needed. """ - def __init__(self, realfile, hash_method="sha1"): + def __init__(self, realfile: BytesIO, hash_method: str = "sha1") -> None: self._realfile = realfile self._read = realfile.read self._hash = hashlib.new(hash_method) @@ -30,7 +33,7 @@ def __init__(self, realfile, hash_method="sha1"): if fpos: raise ValueError("file must be at position 0") - def read(self, size=-1): + def read(self, size: int = -1) -> bytes: data = self._read(size) if not data or size == -1: self._finished = True @@ -39,13 +42,13 @@ def read(self, size=-1): return data @property - def size(self): + def size(self) -> int: if not self._finished: raise AttributeError("Do not access the size attribute before having read all data") return self._size @property - def hash(self): + def hash(self) -> Any: if not self._finished: raise AttributeError("Do not access the hash attribute before having read all data") return self._hash diff --git a/src/moin/storage/backends/fileserver.py b/src/moin/storage/backends/fileserver.py index 64a7e695d..09eadc3ce 100644 --- a/src/moin/storage/backends/fileserver.py +++ b/src/moin/storage/backends/fileserver.py @@ -15,8 +15,8 @@ from __future__ import annotations -from typing import Any -from typing_extensions import override +from typing import Any, Iterator +from typing_extensions import override, Self import os import errno @@ -26,9 +26,9 @@ from urllib.parse import unquote as url_unquote from moin.constants.keys import NAME, ITEMID, REVID, MTIME, SIZE, CONTENTTYPE, HASH_ALGORITHM -from . import BackendBase - +from moin.storage.error import ReadOnlyBackendError from moin.utils.mimetype import MimeType +from . import BackendBase NAME_SEP = "/" @@ -40,24 +40,37 @@ class Backend(BackendBase): @override @classmethod - def from_uri(cls, uri): + def from_uri(cls, uri: str) -> Self: return cls(uri) - def __init__(self, path): + def __init__(self, path: str | os.PathLike) -> None: """ :param path: base directory (all files/dirs below will be exposed) """ self.path = str(path) + @property + @override + def read_only(self) -> bool: + return True + + @override + def create(self) -> None: + raise ReadOnlyBackendError("backend is read-only!") + @override - def open(self): + def destroy(self) -> None: + raise ReadOnlyBackendError("backend is read-only!") + + @override + def open(self) -> None: pass @override - def close(self): + def close(self) -> None: pass - def _mkpath(self, key) -> tuple[str, str]: + def _mkpath(self, key: str) -> tuple[str, str]: """ key -> itemname, absolute path (strip mtime) """ @@ -76,7 +89,7 @@ def _mkpath(self, key) -> tuple[str, str]: relpath = itemname.replace(NAME_SEP, os.sep) return itemname, os.path.join(self.path, relpath) - def _mkkey(self, path) -> tuple[str, int]: + def _mkkey(self, path: str) -> tuple[str, int]: """ absolute path -> itemname, mtime """ @@ -92,14 +105,14 @@ def _mkkey(self, path) -> tuple[str, int]: mtime = int(st.st_mtime) return itemname, mtime - def _encode(self, key): + def _encode(self, key: str) -> str: """ we need to get rid of slashes in revids because we put them into URLs and it would confuse the URL routing. """ return url_quote(key, safe="") - def _decode(self, qkey): + def _decode(self, qkey: str) -> str: return url_unquote(qkey) def _get_meta(self, itemname, path) -> dict[str, Any]: @@ -143,11 +156,11 @@ def _make_directory_page(self, path: str) -> str: dirs.append(name) else: files.append(name) - content = ["= Directory contents =", " * [[../]]"] - content.extend(f" * [[/{name}|{name}/]]" for name in sorted(dirs)) - content.extend(f" * [[/{name}|{name}]]" for name in sorted(files)) - content.append("") - content = "\r\n".join(content) + lines = ["= Directory contents =", " * [[../]]"] + lines.extend(f" * [[/{name}|{name}/]]" for name in sorted(dirs)) + lines.extend(f" * [[/{name}|{name}]]" for name in sorted(files)) + lines.append("") + content = "\r\n".join(lines) except OSError as err: content = str(err) return content @@ -168,7 +181,7 @@ def _get_data(self, itemname: str, path: str) -> BufferedReader | BytesIO: raise @override - def __iter__(self): + def __iter__(self) -> Iterator[str]: # Note: Instead of just yielding the relative , yield ., # so if the file is updated, the revid will change (and the indexer's # update() method can efficiently update the index). @@ -180,9 +193,17 @@ def __iter__(self): yield self._encode("%s.%d" % self._mkkey(os.path.join(dirpath, filename))) @override - def retrieve(self, key) -> tuple[Any, Any]: - key = self._decode(key) + def retrieve(self, metaid: str) -> tuple[Any, Any]: + key = self._decode(metaid) itemname, path = self._mkpath(key) meta = self._get_meta(itemname, path) data = self._get_data(itemname, path) return meta, data + + @override + def store(self, meta: dict[str, Any], data) -> str: + raise ReadOnlyBackendError("backend is read-only!") + + @override + def remove(self, metaid: str, destroy_data: bool = False) -> None: + raise ReadOnlyBackendError("backend is read-only!") diff --git a/src/moin/storage/backends/stores.py b/src/moin/storage/backends/stores.py index 9ded11b9c..4b8963924 100644 --- a/src/moin/storage/backends/stores.py +++ b/src/moin/storage/backends/stores.py @@ -22,18 +22,24 @@ import json +from typing import Any, Iterator, TYPE_CHECKING from typing_extensions import override from moin import log from moin.constants.keys import REVID, DATAID, SIZE, HASH_ALGORITHM, NAME, NAMESPACE +from moin.storage.error import ReadOnlyBackendError +from moin.storage.stores import BytesStoreBase, FileStoreBase from moin.storage.types import MetaData from moin.utils.crypto import make_uuid -from . import BackendBase, MutableBackendBase +from . import BackendBase from ._util import TrackingFileWrapper STORES_PACKAGE = "moin.storage.stores" +if TYPE_CHECKING: + from typing_extensions import Self + logger = log.getLogger(__name__) @@ -46,12 +52,11 @@ def item_name_from_metadata(meta: MetaData) -> str: class Backend(BackendBase): """ - Tie together a store for metadata and a store for data, read-only. + Tie together a store for metadata and a store for data. """ - @override @classmethod - def from_uri(cls, uri): + def from_uri(cls, uri: str) -> Self: store_name_uri = uri.split(":", 1) if len(store_name_uri) != 2: raise ValueError(f"malformed store uri: {uri}") @@ -61,70 +66,67 @@ def from_uri(cls, uri): data_store_uri = store_uri % dict(kind="data") return cls(module.BytesStore.from_uri(meta_store_uri), module.FileStore.from_uri(data_store_uri)) - def __init__(self, meta_store, data_store): + def __init__(self, meta_store: BytesStoreBase, data_store: FileStoreBase, read_only: bool = False): """ :param meta_store: a ByteStore for metadata :param data_store: a FileStore for data + :param read_only: indicates if the backend is read-only or not """ self.meta_store = meta_store self.data_store = data_store + self._read_only = read_only + + def _ensure_mutable(self) -> None: + if self.read_only: + raise ReadOnlyBackendError("backend is read-only!") + + @property + @override + def read_only(self) -> bool: + return self._read_only + + @override + def create(self) -> None: + self._ensure_mutable() + self.meta_store.create() + self.data_store.create() + + @override + def destroy(self) -> None: + self._ensure_mutable() + self.meta_store.destroy() + self.data_store.destroy() @override - def open(self): + def open(self) -> None: self.meta_store.open() self.data_store.open() @override - def close(self): + def close(self) -> None: self.meta_store.close() self.data_store.close() @override - def __iter__(self): + def __iter__(self) -> Iterator[str]: yield from self.meta_store - def _deserialize(self, meta_str): - text = meta_str.decode("utf-8") - meta = json.loads(text) - return meta + def _serialize(self, meta) -> bytes: + text = json.dumps(meta, ensure_ascii=False) + return text.encode("utf-8") + + def _deserialize(self, data: bytes) -> dict[str, Any]: + text = data.decode("utf-8") + return json.loads(text) - def _get_meta(self, metaid): + def _get_meta(self, metaid: str): meta = self.meta_store[metaid] return self._deserialize(meta) def _get_data(self, dataid): - data = self.data_store[dataid] - return data - - @override - def retrieve(self, metaid): - meta = self._get_meta(metaid) - dataid = meta[DATAID] - data = self._get_data(dataid) - return meta, data - - -class MutableBackend(Backend, MutableBackendBase): - """ - same as Backend, but read/write - """ + return self.data_store[dataid] - @override - def create(self): - self.meta_store.create() - self.data_store.create() - - @override - def destroy(self): - self.meta_store.destroy() - self.data_store.destroy() - - def _serialize(self, meta): - text = json.dumps(meta, ensure_ascii=False) - meta_str = text.encode("utf-8") - return meta_str - - def _store_meta(self, meta): + def _store_meta(self, meta: MetaData) -> str: try: # Item.clear_revision calls us with REVID already present metaid = meta[REVID] @@ -133,15 +135,30 @@ def _store_meta(self, meta): self.meta_store[metaid] = self._serialize(meta) return metaid + def _del_meta(self, metaid: str): + del self.meta_store[metaid] + + def _del_data(self, dataid: str): + del self.data_store[dataid] + + @override + def retrieve(self, metaid: str) -> tuple[MetaData, Any]: + meta = self._get_meta(metaid) + dataid = meta[DATAID] + data = self._get_data(dataid) + return meta, data + @override - def store(self, meta, data): + def store(self, meta: MetaData, data) -> str: + self._ensure_mutable() + try: dataid = meta[DATAID] except KeyError: dataid = make_uuid() tfw = TrackingFileWrapper(data, hash_method=HASH_ALGORITHM) - self.data_store[dataid] = tfw + self.data_store[dataid] = tfw # type: ignore meta[DATAID] = dataid # check whether size is consistent: @@ -169,14 +186,9 @@ def store(self, meta, data): return self._store_meta(meta) - def _del_meta(self, metaid): - del self.meta_store[metaid] - - def _del_data(self, dataid): - del self.data_store[dataid] - @override - def remove(self, metaid, destroy_data): + def remove(self, metaid: str, destroy_data: bool = False) -> None: + self._ensure_mutable() meta = self._get_meta(metaid) dataid = meta[DATAID] self._del_meta(metaid) diff --git a/src/moin/storage/error.py b/src/moin/storage/error.py index a73747a60..4732a7c4e 100644 --- a/src/moin/storage/error.py +++ b/src/moin/storage/error.py @@ -22,6 +22,12 @@ class BackendError(StorageError): """ +class ReadOnlyBackendError(BackendError): + """ + Raised if a mutating operation on a read-only backend was attempted. + """ + + class NoSuchItemError(BackendError): """ Raised if the requested item does not exist. diff --git a/src/moin/storage/middleware/_tests/test_routing.py b/src/moin/storage/middleware/_tests/test_routing.py index 73f37e3d5..066b2d211 100644 --- a/src/moin/storage/middleware/_tests/test_routing.py +++ b/src/moin/storage/middleware/_tests/test_routing.py @@ -13,18 +13,26 @@ from ..routing import Backend as RoutingBackend -from moin.storage.backends.stores import MutableBackend as StoreBackend, Backend as ROBackend +from moin.storage.backends.stores import Backend as StoreBackend +from moin.storage.error import ReadOnlyBackendError from moin.storage.stores.memory import BytesStore as MemoryBytesStore from moin.storage.stores.memory import FileStore as MemoryFileStore def make_ro_backend(): - store = StoreBackend(MemoryBytesStore(), MemoryFileStore()) - store.create() - store.open() - store.store({NAME: "test"}, BytesIO(b"")) - store.store({NAME: "test2"}, BytesIO(b"")) - return ROBackend(store.meta_store, store.data_store) + meta_store = MemoryBytesStore() + data_store = MemoryFileStore() + # create a mutable backend + backend = StoreBackend(meta_store, data_store) + backend.create() + backend.open() + backend.store({NAME: "test"}, BytesIO(b"")) + backend.store({NAME: "test2"}, BytesIO(b"")) + backend.close() + # create a read-only backend + backend = StoreBackend(meta_store, data_store, read_only=True) + backend.open() + return backend @pytest.fixture @@ -66,15 +74,15 @@ def test_store_get_del(router): def test_store_readonly_fails(router): - with pytest.raises(TypeError): + with pytest.raises(ReadOnlyBackendError): router.store(dict(name=["ro:testing"]), BytesIO(b"")) def test_del_readonly_fails(router): ro_be_name, ro_id = next(iter(router)) # we have only readonly items print(ro_be_name, ro_id) - with pytest.raises(TypeError): - router.remove(ro_be_name, ro_id) + with pytest.raises(ReadOnlyBackendError): + router.remove(ro_be_name, ro_id, destroy_data=True) def test_destroy_create_dont_touch_ro(router): diff --git a/src/moin/storage/middleware/_tests/test_serialization.py b/src/moin/storage/middleware/_tests/test_serialization.py index 3d2dfc438..c9a0a5543 100644 --- a/src/moin/storage/middleware/_tests/test_serialization.py +++ b/src/moin/storage/middleware/_tests/test_serialization.py @@ -16,7 +16,7 @@ from moin.constants.keys import NAME, CONTENTTYPE from moin.constants.namespaces import NAMESPACE_DEFAULT -from moin.storage.backends.stores import MutableBackend +from moin.storage.backends.stores import Backend from moin.storage.stores.memory import BytesStore, FileStore contents = [ @@ -51,7 +51,7 @@ def make_middleware(request: pytest.FixtureRequest, tmpdir): # Scenario meta_store = BytesStore() data_store = FileStore() - _backend = MutableBackend(meta_store, data_store) + _backend = Backend(meta_store, data_store) namespaces = [(NAMESPACE_DEFAULT, "backend")] backends = {"backend": _backend} backend = RoutingBackend(namespaces, backends) diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py index 44ab98a64..41540e8e4 100644 --- a/src/moin/storage/middleware/indexing.py +++ b/src/moin/storage/middleware/indexing.py @@ -50,7 +50,7 @@ from __future__ import annotations -from typing import Any, Iterator, TYPE_CHECKING +from typing import Any, Generator, Iterator, TYPE_CHECKING import gc import io @@ -672,7 +672,7 @@ def _modify_index( else: raise ValueError(f"mode must be 'update', 'add' or 'delete', not '{mode}'") - def _find_latest_backends_revids(self, index, query=None): + def _find_latest_backends_revids(self, index: FileIndex, query=None) -> list[tuple[str, str]]: """ find the latest revision identifiers using the all-revs index @@ -830,7 +830,7 @@ def dump(self, tmp=False, idx_name=LATEST_REVS): finally: ix.close() - def query_parser(self, default_fields, idx_name=LATEST_REVS): + def query_parser(self, default_fields: list[str], idx_name: str = LATEST_REVS) -> QueryParser: """ Build a query parser for a list of default fields. """ @@ -843,7 +843,7 @@ def query_parser(self, default_fields, idx_name=LATEST_REVS): raise ValueError("default_fields list must at least contain one field name") qp.add_plugin(RegexPlugin()) - def userid_pseudo_field_factory(fieldname): + def userid_pseudo_field_factory(fieldname: str): """generate a translator function, that searches for the userid in the given fieldname when provided with the username """ @@ -872,7 +872,7 @@ def userid_pseudo_field(node): ) return qp - def search(self, q, idx_name=LATEST_REVS, **kw): + def search(self, q, idx_name: str = LATEST_REVS, **kw) -> Generator[Revision]: """ Search with query q, yield Revisions. """ @@ -885,7 +885,7 @@ def search(self, q, idx_name=LATEST_REVS, **kw): item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) yield item.get_revision(doc[REVID], doc=doc) - def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): + def search_page(self, q, idx_name: str = LATEST_REVS, pagenum=1, pagelen=10, **kw) -> Generator[Revision]: """ Same as search, but with paging support. """ @@ -898,7 +898,7 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) yield item.get_revision(doc[REVID], doc=doc) - def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): + def search_meta(self, q, idx_name: str = LATEST_REVS, regex=None, **kw) -> Generator[MetaData]: """ Search with query q, yield Revision metadata from index. """ @@ -913,7 +913,7 @@ def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): meta = hit.fields() yield meta - def search_meta_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): + def search_meta_page(self, q, idx_name: str = LATEST_REVS, pagenum=1, pagelen=10, **kw) -> Generator[MetaData]: """ Same as search_meta, but with paging support. """ @@ -931,7 +931,7 @@ def search_results_size(self, q, idx_name=ALL_REVS, **kw): with self.ix[idx_name].searcher() as searcher: return len(searcher.search(q, **kw)) - def documents(self, idx_name=LATEST_REVS, **kw): + def documents(self, idx_name: str = LATEST_REVS, **kw) -> Generator[Revision]: """ Yield Revisions matching the kw args. """ @@ -940,7 +940,7 @@ def documents(self, idx_name=LATEST_REVS, **kw): item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) yield item.get_revision(doc[REVID], doc=doc) - def _documents(self, idx_name=LATEST_REVS, **kw): + def _documents(self, idx_name: str = LATEST_REVS, **kw) -> Generator[Document]: """ Yield documents matching the kw args (internal use only). @@ -951,7 +951,7 @@ def _documents(self, idx_name=LATEST_REVS, **kw): # ends and the "with" is left to close the index files. yield from searcher.documents(**kw) - def document(self, idx_name=LATEST_REVS, **kw): + def document(self, idx_name: str = LATEST_REVS, **kw) -> Revision | None: """ Return a Revision matching the kw args. """ @@ -961,7 +961,7 @@ def document(self, idx_name=LATEST_REVS, **kw): item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) return item.get_revision(doc[REVID], doc=doc) - def _document(self, idx_name=LATEST_REVS, short=False, **kw): + def _document(self, idx_name: str = LATEST_REVS, short=False, **kw) -> Document | None: """ Return a document matching the kw args (internal use only). """ @@ -970,7 +970,7 @@ def _document(self, idx_name=LATEST_REVS, short=False, **kw): with self.ix[idx_name].searcher() as searcher: return searcher.document(**kw) - def has_item(self, name: str): + def has_item(self, name: str) -> bool: if name.startswith("@itemid/"): item = Item(self, short=True, **{ITEMID: name[8:]}) else: @@ -978,7 +978,7 @@ def has_item(self, name: str): item = Item(self, short=True, **{NAME_EXACT: fqname.value, NAMESPACE: fqname.namespace}) return bool(item) - def __getitem__(self, name: str): + def __getitem__(self, name: str) -> Item: """ Return item with (may be a new or existing item). """ @@ -987,7 +987,7 @@ def __getitem__(self, name: str): fqname = split_fqname(name) return Item(self, **{NAME_EXACT: fqname.value, NAMESPACE: fqname.namespace}) - def get_item(self, short=False, **query): + def get_item(self, short=False, **query) -> Item: """ Return item identified by the query (may be a new or existing item). @@ -996,7 +996,7 @@ def get_item(self, short=False, **query): """ return Item(self, short=short, **query) - def create_item(self, **query): + def create_item(self, **query) -> Item: """ Return item identified by the query (must be a new item). @@ -1062,7 +1062,7 @@ def name(self) -> str | None: def namespace(self): return self.meta.get(NAMESPACE, "") - def _fqname(self, name=None): + def _fqname(self, name=None) -> CompositeName: """ return the fully qualified name including the namespace: NS:NAME """ @@ -1089,7 +1089,7 @@ def fqnames(self) -> list[CompositeName]: return [self.fqname] @property - def parentnames(self): + def parentnames(self) -> set[str]: """ Return list of parent names (same order as in names, but no dupes) @@ -1098,7 +1098,7 @@ def parentnames(self): return parent_names(self.names) @property - def fqparentnames(self): + def fqparentnames(self) -> list[CompositeName]: """ return the fully qualified parent names including the namespace: NS:NAME """ @@ -1109,23 +1109,24 @@ def acl(self): return self.meta.get(ACL) @property - def ptime(self): + def ptime(self) -> int | None: dt = self.meta.get(PTIME) if dt is not None: return utctimestamp(dt) @property - def names(self): + def names(self) -> list[str]: return self.meta[NAME] @property - def mtime(self): + def mtime(self) -> int | None: dt = self.meta.get(MTIME) if dt is not None: return utctimestamp(dt) class Item(PropertiesMixin): + def __init__(self, indexer: IndexingMiddleware, latest_doc=None, short=False, **query): """ :param indexer: indexer middleware instance @@ -1166,7 +1167,7 @@ def meta(self): return self._current @property - def parentids(self): + def parentids(self) -> set[str]: """ compute list of parent itemids @@ -1215,13 +1216,13 @@ def iter_revs(self): if self: yield from self.indexer.documents(idx_name=ALL_REVS, itemid=self.itemid) - def __getitem__(self, revid): + def __getitem__(self, revid: str) -> Revision: """ Get Revision with revision id . """ return Revision(self, revid) - def get_revision(self, revid, doc=None): + def get_revision(self, revid: str, doc: Document | None = None) -> Revision: """ Similar to item[revid], but you can optionally give an already existing whoosh result document for the given revid to avoid backend accesses for some use cases. @@ -1433,7 +1434,9 @@ class Revision(PropertiesMixin): An existing revision (exists in the backend). """ - def __init__(self, item: Item, revid: str, doc=None, name=None, retry=False): + def __init__( + self, item: Item, revid: str, doc: Document | None = None, name: str | None = None, retry: bool = False + ): is_current = revid == CURRENT if doc is None: if is_current: @@ -1466,7 +1469,7 @@ def _load(self): return meta, data @property - def data(self): + def data(self) -> ItemData: if self._data is None: self._load() return self._data @@ -1495,7 +1498,8 @@ def __repr__(self): class Meta(Mapping): - def __init__(self, revision, doc, meta: MetaData | None = None) -> None: + + def __init__(self, revision: Revision, doc: Document, meta: MetaData | None = None) -> None: self.revision = revision self._doc = doc or {} self._meta = meta or {} diff --git a/src/moin/storage/middleware/protecting.py b/src/moin/storage/middleware/protecting.py index 0e9a17f19..c1c75eb66 100644 --- a/src/moin/storage/middleware/protecting.py +++ b/src/moin/storage/middleware/protecting.py @@ -14,23 +14,21 @@ from __future__ import annotations +from typing import Any, Generator, TYPE_CHECKING + import time from whoosh.util.cache import lfu_cache -from moin.constants.rights import CREATE, READ, PUBREAD, WRITE, ADMIN, DESTROY, ACL_RIGHTS_CONTENTS +from moin import log from moin.constants.keys import ACL, ALL_REVS, LATEST_REVS, ITEMID, FQNAMES, NAME, NAMESPACE from moin.constants.namespaces import NAMESPACE_ALL - +from moin.constants.rights import CREATE, READ, PUBREAD, WRITE, ADMIN, DESTROY, ACL_RIGHTS_CONTENTS from moin.security import AccessControlList from moin.storage.middleware.exceptions import AccessDenied from moin.storage.types import ItemData, MetaData from moin.utils import close_file -from moin.utils.names import gen_fqnames, parent_names, split_fqname - -from moin import log - -from typing import Any, TYPE_CHECKING +from moin.utils.names import CompositeName, gen_fqnames, parent_names, split_fqname if TYPE_CHECKING: from moin.config import AclConfig @@ -47,7 +45,7 @@ ACL_CACHE = 600 # avoid ACL recalculation: user, namespace, ACL, parents, right > True/false -def pchecker(right, allowed, item): +def pchecker(right: str, allowed: bool, item: Item) -> bool: """Check blog entry publication date.""" if allowed and right == PUBREAD: # PUBREAD permission is only granted after publication time (ptime) @@ -87,7 +85,7 @@ def __init__(self, indexer: IndexingMiddleware, user: User, acl_mapping: list[tu lfu_cache_decorator = lfu_cache(ACL_CACHE) self.allows = lfu_cache_decorator(self._allows) # placeholder to show we are passing meta data around without affecting lfu caches - self.meta = None + self.meta: MetaData | None = None def _clear_acl_cache(self): # if we have modified the backend somehow so ACL lookup is influenced, @@ -95,7 +93,7 @@ def _clear_acl_cache(self): # ACL lookups afterwards will fetch fresh info from the lower layers. self.get_acls.cache_clear() - def _get_configured_acls(self, fqname): + def _get_configured_acls(self, fqname: CompositeName): """ for a fully-qualified itemname (namespace:name), get the acl configuration for that (part of the) namespace. @@ -112,7 +110,7 @@ def _get_configured_acls(self, fqname): return {"default": "All:", "hierarchic": False, "after": "", "before": ""} raise ValueError(f"No acl_mapping entry found for item {fqname!r}") - def _get_acls(self, itemid=None, fqname=None): + def _get_acls(self, itemid: str | None = None, fqname: CompositeName | None = None): """ return a list of (alternatively valid) effective acls for the item identified via itemid or fqname. @@ -148,7 +146,7 @@ def _get_acls(self, itemid=None, fqname=None): acl = self.meta[ACL] return [acl] - item = None + item: ProtectedItem | None = None if not meta_available or self._get_configured_acls(fqname)["hierarchic"]: """self.meta is not valid or namespace uses hierarchic acls and we need item parentids""" item = self.get_item(short=True, **q) @@ -175,22 +173,22 @@ def _eval_acl(self, acl, default_acl, user_name, right): aclobj = self.parse_acl(acl, default_acl) return aclobj.may(user_name, right) - def query_parser(self, default_fields, idx_name=LATEST_REVS): + def query_parser(self, default_fields: list[str], idx_name: str = LATEST_REVS): return self.indexer.query_parser(default_fields, idx_name=idx_name) - def search(self, q, idx_name=LATEST_REVS, **kw): + def search(self, q, idx_name: str = LATEST_REVS, **kw) -> Generator[ProtectedRevision]: for rev in self.indexer.search(q, idx_name, **kw): rev = ProtectedRevision(self, rev) if rev.allows(READ) or rev.allows(PUBREAD): yield rev - def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): + def search_page(self, q, idx_name: str = LATEST_REVS, pagenum=1, pagelen=10, **kw) -> Generator[ProtectedRevision]: for rev in self.indexer.search_page(q, idx_name, pagenum, pagelen, **kw): rev = ProtectedRevision(self, rev) if rev.allows(READ) or rev.allows(PUBREAD): yield rev - def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): + def search_meta(self, q, idx_name: str = LATEST_REVS, regex=None, **kw): """ Yield an item's metadata, skipping any items where read permission is denied. @@ -204,7 +202,7 @@ def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): if result: yield meta - def may_read_rev(self, meta): + def may_read_rev(self, meta: MetaData) -> bool: """ Return true if user may read item revision represented by whoosh index hit. Called by ajaxsearch template, others. @@ -231,7 +229,7 @@ def full_acls(self): item_acl = acl_cfg["default"] yield " ".join([before_acl, item_acl, after_acl]) - def _allows(self, user_names, acls, parentnames, namespace, right): + def _allows(self, user_names, acls, parentnames, namespace, right) -> bool: """ Check if usernames may have access on this item. @@ -326,7 +324,8 @@ def may(self, fqname, capability, usernames=None, item=None): class ProtectedItem: - def __init__(self, protector: ProtectingMiddleware, item: Item): + + def __init__(self, protector: ProtectingMiddleware, item: Item) -> None: """ :param protector: protector middleware :param item: item to protect @@ -426,7 +425,7 @@ def __getitem__(self, revid): rev = self.item[revid] return ProtectedRevision(self.protector, rev, p_item=self) - def get_revision(self, revid): + def get_revision(self, revid: str): return self[revid] def store_revision( @@ -436,9 +435,9 @@ def store_revision( overwrite: bool = False, return_rev: bool = False, return_meta: bool = False, - fqname=None, - **kw, - ) -> tuple[Any, dict[str, Any]] | ProtectedRevision | None: + fqname: CompositeName | None = None, + **kw: Any, + ) -> tuple[CompositeName, MetaData] | ProtectedRevision | None: self.require(WRITE) if not self: self.require(CREATE) @@ -475,7 +474,7 @@ def destroy_all_revisions(self): class ProtectedRevision: - def __init__(self, protector, rev: Revision, p_item: ProtectedItem | None = None) -> None: + def __init__(self, protector: ProtectingMiddleware, rev: Revision, p_item: ProtectedItem | None = None) -> None: """ :param protector: Protector middleware :param rev: Revision to protect diff --git a/src/moin/storage/middleware/routing.py b/src/moin/storage/middleware/routing.py index 6ee71bd94..ed6e7b9bc 100644 --- a/src/moin/storage/middleware/routing.py +++ b/src/moin/storage/middleware/routing.py @@ -11,13 +11,13 @@ from __future__ import annotations from moin.constants.keys import NAME, BACKENDNAME, NAMESPACE +from moin.storage.types import ItemData, MetaData -from moin.storage.backends import MutableBackendBase - -from typing import TYPE_CHECKING +from typing import Iterator, TYPE_CHECKING if TYPE_CHECKING: from moin.config import BackendMapping, NamespaceMapping + from typing_extensions import Self class Backend: @@ -61,7 +61,7 @@ def __init__(self, namespaces: NamespaceMapping, backends: BackendMapping): assert backend_name in backends @classmethod - def from_uri(cls, uri): + def from_uri(cls, uri: str) -> Self: """ Create an instance using the data given in the URI. """ @@ -75,7 +75,7 @@ def close(self): for backend in self.backends.values(): backend.close() - def _get_backend(self, fq_names: list[str]): + def _get_backend(self, fq_names: list[str]) -> tuple[str, list[str], str]: """ For a given fully qualified item name (e.g., "ns:itemname"), find the backend it belongs to, the item name without the namespace @@ -91,14 +91,14 @@ def _get_backend(self, fq_names: list[str]): return backend_name, item_names, namespace.rstrip(":") raise AssertionError(f"No backend found for {fq_name!r}. Namespaces: {self.namespaces!r}") - def __iter__(self): + def __iter__(self) -> Iterator[tuple[str, str]]: # Note: yields enough information so we can retrieve the revision from # the right backend later (this is more than just the revid). for backend_name, backend in self.backends.items(): for revid in backend: # TODO maybe directly yield the backend? yield (backend_name, revid) - def retrieve(self, backend_name: str, revid): + def retrieve(self, backend_name: str, revid: str) -> tuple[MetaData, ItemData]: backend = self.backends[backend_name] meta, data = backend.retrieve(revid) return meta, data @@ -106,15 +106,15 @@ def retrieve(self, backend_name: str, revid): # writing part def create(self): for backend in self.backends.values(): - if isinstance(backend, MutableBackendBase): + if not backend.read_only: backend.create() def destroy(self): for backend in self.backends.values(): - if isinstance(backend, MutableBackendBase): + if not backend.read_only: backend.destroy() - def store(self, meta, data): + def store(self, meta, data) -> tuple[str, str]: namespace = meta.get(NAMESPACE) if namespace is None: # if there is no NAMESPACE in metadata, we assume that the NAME @@ -134,9 +134,6 @@ def store(self, meta, data): backend_name, _, _ = self._get_backend([namespace]) backend = self.backends[backend_name] - if not isinstance(backend, MutableBackendBase): - raise TypeError(f"backend {backend_name} is read-only!") - revid = backend.store(meta, data) # add the BACKENDNAME after storing, so it gets only into @@ -146,6 +143,4 @@ def store(self, meta, data): def remove(self, backend_name, revid, destroy_data): backend = self.backends[backend_name] - if not isinstance(backend, MutableBackendBase): - raise TypeError(f"backend {backend_name} is read-only") backend.remove(revid, destroy_data) diff --git a/src/moin/storage/stores/__init__.py b/src/moin/storage/stores/__init__.py index 6630c895f..ddb71bc0a 100644 --- a/src/moin/storage/stores/__init__.py +++ b/src/moin/storage/stores/__init__.py @@ -9,19 +9,28 @@ you can likely implement it by adding very little and rather easy code. """ +from __future__ import annotations + +from typing import BinaryIO, Iterator, TypeAlias, TypeVar +from typing_extensions import Self + from abc import abstractmethod -from collections.abc import Mapping, MutableMapping -from io import BytesIO +from collections.abc import MutableMapping + +BinaryData = bytes | bytearray | memoryview + +_StoreValueT = TypeVar("_StoreValueT") -class StoreBase(Mapping): + +class StoreBase(MutableMapping[str, _StoreValueT]): """ - A simple read-only key/value store. + A simple read/write key/value store. """ @classmethod @abstractmethod - def from_uri(cls, uri): + def from_uri(cls: type[Self], uri: str) -> Self: """ Return an instance constructed from the given URI. """ @@ -32,128 +41,66 @@ def __init__(self, **kw): whatever we need for open(), create(), etc. """ - def open(self): + def create(self) -> None: """ - Open the store; prepare it for usage. + Create an empty store. """ - def close(self): + def destroy(self) -> None: """ - Close the store; stop using it; free resources (except stored data). + Destroy the store (erase all stored data, remove store). """ - @abstractmethod - def __iter__(self): + def open(self) -> None: """ - Iterate over keys present in the store. + Open the store; prepare it for usage. """ - def __len__(self): - return len([key for key in self]) - - @abstractmethod - def __getitem__(self, key): + def close(self) -> None: """ - Return data stored for key. + Close the store; stop using it; free resources (except stored data). """ - -class BytesStoreBase(StoreBase): @abstractmethod - def __getitem__(self, key): + def __iter__(self) -> Iterator[str]: """ - Return a byte string for key if it exists; otherwise raise KeyError. + Iterate over keys present in the store. """ + def __len__(self) -> int: + return len([key for key in self]) -class FileStoreBase(StoreBase): @abstractmethod - def __getitem__(self, key): - """ - Return a file-like object for key if it exists; otherwise raise KeyError. - - Note: The caller is responsible for closing the open file we return - after usage. - """ - - -class MutableStoreBase(StoreBase, MutableMapping): - """ - A simple read/write key/value store. - """ - - def create(self): + def __getitem__(self, key: str) -> _StoreValueT: """ - Create an empty store. - """ - - def destroy(self): - """ - Destroy the store (erase all stored data, remove store). + Return data stored for key. """ + raise KeyError @abstractmethod - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: _StoreValueT) -> None: """ Store value under key. """ + raise KeyError @abstractmethod - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: """ Delete the key, dereference the related value in the store. """ + raise KeyError -class BytesMutableStoreBase(MutableStoreBase): - @abstractmethod - def __setitem__(self, key, value): - """ - Store a bytestring for key. - """ - - -class BytesMutableStoreMixin: - """ - Mix this into a FileMutableStore to get a BytesMutableStore, like shown here: - - class BytesStore(BytesMutableStoreMixin, FileStore, BytesMutableStoreBase): - # That’s all; nothing more needed. - """ - - def __getitem__(self, key): - with super().__getitem__(key) as stream: - return stream.read() - - def __setitem__(self, key, value): - with BytesIO(value) as stream: - super().__setitem__(key, stream) - - -class FileMutableStoreBase(MutableStoreBase): - @abstractmethod - def __setitem__(self, key, stream): - """ - Store a file-like object for key. - - Note: caller is responsible for giving us an open file and also for - closing that file later. The caller must not rely on any specific - file pointer position after we return. - """ - +BytesStoreBase: TypeAlias = StoreBase[BinaryData] +""" +A store dealing with binary data. +""" -class FileMutableStoreMixin: - """ - Mix this into a BytesMutableStore to get a FileMutableStore, like shown here: - class FileStore(FileMutableStoreMixin, BytesStore, FileMutableStoreBase) - # That’s all; nothing more needed. - """ - - def __getitem__(self, key): - value = super().__getitem__(key) - return BytesIO(value) +FileStoreBase: TypeAlias = StoreBase[BinaryIO] +""" +A store dealing with file-like objects. - def __setitem__(self, key, stream): - value = stream.read() - super().__setitem__(key, value) +Note: The caller is responsible for closing the opened files. +""" diff --git a/src/moin/storage/stores/_tests/conftest.py b/src/moin/storage/stores/_tests/conftest.py index 5fb123da7..cc8fdf50e 100644 --- a/src/moin/storage/stores/_tests/conftest.py +++ b/src/moin/storage/stores/_tests/conftest.py @@ -6,7 +6,7 @@ """ import pytest -from ..wrappers import ByteToStreamWrappingStore +from .wrappers import ByteToStreamWrappingStore STORES_PACKAGE = "moin.storage.stores" diff --git a/src/moin/storage/stores/wrappers.py b/src/moin/storage/stores/_tests/wrappers.py similarity index 100% rename from src/moin/storage/stores/wrappers.py rename to src/moin/storage/stores/_tests/wrappers.py diff --git a/src/moin/storage/stores/fs.py b/src/moin/storage/stores/fs.py index 78c6d0566..72e8fea83 100644 --- a/src/moin/storage/stores/fs.py +++ b/src/moin/storage/stores/fs.py @@ -7,51 +7,52 @@ Store in the file system, one file per key/value pair. """ +from __future__ import annotations + +from typing import BinaryIO, Iterator +from typing_extensions import Self + import os import errno import shutil -from . import BytesMutableStoreBase, FileMutableStoreBase, BytesMutableStoreMixin +from io import BytesIO +from . import BinaryData, BytesStoreBase, FileStoreBase -class FileStore(FileMutableStoreBase): - """ - A simple filesystem-based store. - Keys are required to be valid filenames. - """ +class FileStoreMixin: @classmethod - def from_uri(cls, uri): + def from_uri(cls: type[Self], uri: str) -> Self: return cls(uri) - def __init__(self, path): + def __init__(self, path: str) -> None: """ :param path: Base directory used for this store. """ self.path = path - def create(self): + def create(self) -> None: try: os.makedirs(self.path) except OSError as e: if e.errno != errno.EEXIST: raise - def destroy(self): + def destroy(self) -> None: shutil.rmtree(self.path) - def _mkpath(self, key): - # XXX unsafe keys? + def _mkpath(self, key: str) -> str: return os.path.join(self.path, key) - def __iter__(self): + def __iter__(self) -> Iterator[str]: yield from os.listdir(self.path) - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: os.remove(self._mkpath(key)) - def __getitem__(self, key): + def _getitem(self, key: str) -> BinaryIO: try: return open(self._mkpath(key), "rb") except OSError as e: @@ -59,11 +60,35 @@ def __getitem__(self, key): raise KeyError(key) raise - def __setitem__(self, key, stream): + def _setitem(self, key: str, stream: BinaryIO) -> None: with open(self._mkpath(key), "wb") as f: blocksize = 64 * 1024 shutil.copyfileobj(stream, f, blocksize) -class BytesStore(BytesMutableStoreMixin, FileStore, BytesMutableStoreBase): - """Filesystem BytesStore.""" +class FileStore(FileStoreMixin, FileStoreBase): + """ + A simple filesystem-based store. + + Keys are required to be valid filenames. + """ + + def __getitem__(self, key: str) -> BinaryIO: + return self._getitem(key) + + def __setitem__(self, key: str, stream: BinaryIO) -> None: + self._setitem(key, stream) + + +class BytesStore(FileStoreMixin, BytesStoreBase): + """ + Filesystem BytesStore. + """ + + def __getitem__(self, key: str) -> BinaryData: + with self._getitem(key) as stream: + return stream.read() + + def __setitem__(self, key: str, value: BinaryData) -> None: + with BytesIO(value) as stream: + self._setitem(key, stream) diff --git a/src/moin/storage/stores/memory.py b/src/moin/storage/stores/memory.py index e1a91d874..f6dfb2b68 100644 --- a/src/moin/storage/stores/memory.py +++ b/src/moin/storage/stores/memory.py @@ -9,46 +9,88 @@ Note: likely this is mostly useful for unit tests. """ -from . import BytesMutableStoreBase, FileMutableStoreBase, FileMutableStoreMixin +from typing import BinaryIO, Iterator +from typing_extensions import override, Self +from io import BytesIO +from . import BinaryData, BytesStoreBase, FileStoreBase -class BytesStore(BytesMutableStoreBase): + +class MemoryStoreMixin: """ A simple dict-based in-memory store. No persistence! """ @classmethod - def from_uri(cls, uri): + def from_uri(cls: type[Self], uri: str) -> Self: return cls() - def __init__(self): - self._st = None - self.__st = None + def __init__(self) -> None: + self._st: dict[str, BinaryData] | None = None + self.__st: dict[str, BinaryData] | None = None - def create(self): + def create(self) -> None: self.__st = {} - def destroy(self): + def destroy(self) -> None: + if self._st is not None: + self.close() self.__st = None - def open(self): + def open(self) -> None: + if self.__st is None: + raise ValueError("I/O operation on non-existing store.") self._st = self.__st - def close(self): + def close(self) -> None: self._st = None - def __iter__(self): + def __iter__(self) -> Iterator: + if self._st is None: + raise yield from self._st - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: + if self._st is None: + raise ValueError("I/O operation on closed store.") del self._st[key] - def __getitem__(self, key): + def _getitem(self, key: str) -> BinaryData: + if self._st is None: + raise ValueError("I/O operation on closed store.") return self._st[key] - def __setitem__(self, key, value): + def _setitem(self, key: str, value: BinaryData) -> None: + if self._st is None: + raise ValueError("I/O operation on closed store.") self._st[key] = value -class FileStore(FileMutableStoreMixin, BytesStore, FileMutableStoreBase): - """In-memory FileStore.""" +class BytesStore(MemoryStoreMixin, BytesStoreBase): + """ + A simple dict-based in-memory store. No persistence! + """ + + @override + def __getitem__(self, key: str) -> BinaryData: + return self._getitem(key) + + @override + def __setitem__(self, key: str, value: BinaryData) -> None: + self._setitem(key, value) + + +class FileStore(MemoryStoreMixin, FileStoreBase): + """ + In-memory FileStore. + """ + + @override + def __getitem__(self, key: str) -> BinaryIO: + value = self._getitem(key) + return BytesIO(value) + + @override + def __setitem__(self, key: str, stream: BinaryIO) -> None: + value = stream.read() + self._setitem(key, value) diff --git a/src/moin/storage/stores/sqla.py b/src/moin/storage/stores/sqla.py index 77d7a8286..902442d3a 100644 --- a/src/moin/storage/stores/sqla.py +++ b/src/moin/storage/stores/sqla.py @@ -7,38 +7,53 @@ Stores key/value pairs into any database supported by SQLAlchemy. """ +from __future__ import annotations + import os +from typing import Any, BinaryIO, TYPE_CHECKING +from typing_extensions import override, Self + +from io import BytesIO + from sqlalchemy import create_engine, select, MetaData, Table, Column, String, LargeBinary from sqlalchemy.pool import StaticPool from sqlalchemy.exc import IntegrityError from moin.constants.namespaces import NAMESPACE_USERPROFILES -from . import BytesMutableStoreBase, FileMutableStoreBase, FileMutableStoreMixin +from moin.storage.error import StorageError + +from . import BytesStoreBase, FileStoreBase + +if TYPE_CHECKING: + from sqlalchemy import Engine KEY_LEN = 128 VALUE_LEN = 1024 * 1024 # 1MB binary data -class BytesStore(BytesMutableStoreBase): - """ - A simple dict-based in-memory store. No persistence! - """ +class SQLAlchemyStoreMixin: @classmethod - def from_uri(cls, uri): + def from_uri(cls, uri: str) -> Self: """ Create a new cls instance from the given URI. :param cls: Class to create. :param uri: The database URI that we pass on to SQLAlchemy. """ - # using "::" to support windows pathnames that - # may include ":" after the drive letter. + # using "::" to support windows pathnames that may include ":" after the drive letter. params = uri.split("::") - return cls(*params) - - def __init__(self, db_uri=None, table_name="store", verbose=False): + kwargs: dict[str, Any] = {} + if len(params) > 0: + kwargs["db_uri"] = params[0] + if len(params) > 1: + kwargs["table_name"] = params[1] + if len(params) > 2: + kwargs["verbose"] = params[2].lower() == "true" + return cls(**kwargs) + + def __init__(self, db_uri: str | None = None, table_name: str = "store", verbose: bool = False) -> None: """ :param db_uri: The database URI that we pass on to SQLAlchemy. May contain user/password/host/port/etc. @@ -47,17 +62,26 @@ def __init__(self, db_uri=None, table_name="store", verbose=False): """ self.db_uri = db_uri self.verbose = verbose - self.engine = None - self.table = None + self.engine: Engine | None = None + self.table: Table | None = None self.table_name = table_name - if db_uri.startswith("sqlite:///"): + self._make_dirs() + + def _make_dirs(self) -> None: + if self.db_uri is None: + return + if self.db_uri.startswith("sqlite:///"): db_path = os.path.dirname(self.db_uri.split("sqlite:///")[1]) if db_path and not os.path.exists(db_path): os.makedirs(db_path) - def open(self): - db_uri = self.db_uri - if db_uri is None: + def _engine(self) -> Engine: + if self.engine is None: + raise StorageError("SQLAlchemy store: no engine") + return self.engine + + def open(self) -> None: + if (db_uri := self.db_uri) is None: # These settings apply only for development/testing. The additional args are necessary # due to some limitations of the in-memory SQLite database. db_uri = "sqlite:///:memory:" @@ -73,45 +97,47 @@ def open(self): Column("value", LargeBinary(VALUE_LEN)), ) - def close(self): - self.engine.dispose() + def close(self) -> None: + if self.engine is not None: + self.engine.dispose() + self.engine = None self.table = None - def create(self): + def create(self) -> None: self.open() - with self.engine.connect() as conn: + with self._engine().connect() as conn: with conn.begin(): self.metadata.create_all(conn) self.close() def destroy(self): self.open() - with self.engine.connect() as conn: + with self._engine().connect() as conn: with conn.begin(): self.metadata.drop_all(conn) self.close() def __iter__(self): - with self.engine.connect() as conn: + with self._engine().connect() as conn: rows = conn.execute(select(self.table.c.key)) for row in rows: yield row[0] def __delitem__(self, key): - with self.engine.connect() as conn: + with self._engine().connect() as conn: with conn.begin(): conn.execute(self.table.delete().where(self.table.c.key == key)) - def __getitem__(self, key): - with self.engine.connect() as conn: + def _getitem(self, key): + with self._engine().connect() as conn: value = conn.execute(select(self.table.c.value).where(self.table.c.key == key)).fetchone() if value is not None: return value[0] else: raise KeyError(key) - def __setitem__(self, key, value): - with self.engine.connect() as conn: + def _setitem(self, key, value): + with self._engine().connect() as conn: with conn.begin(): try: conn.execute(self.table.insert().values(key=key, value=value)) @@ -123,5 +149,29 @@ def __setitem__(self, key, value): raise -class FileStore(FileMutableStoreMixin, BytesStore, FileMutableStoreBase): - """SQLAlchemy FileStore.""" +class BytesStore(SQLAlchemyStoreMixin, BytesStoreBase): + """ + A simple dict-based in-memory store. No persistence! + """ + + def __getitem__(self, key): + return self._getitem(key) + + def __setitem__(self, key, value): + self._setitem(key, value) + + +class FileStore(SQLAlchemyStoreMixin, FileStoreBase): + """ + SQLAlchemy FileStore. + """ + + @override + def __getitem__(self, key: str) -> BinaryIO: + value = self._getitem(key) + return BytesIO(value) + + @override + def __setitem__(self, key: str, stream: BinaryIO) -> None: + value = stream.read() + self._setitem(key, value) diff --git a/src/moin/storage/stores/sqlite.py b/src/moin/storage/stores/sqlite.py index 5ca822bbe..4a1e42ebc 100644 --- a/src/moin/storage/stores/sqlite.py +++ b/src/moin/storage/stores/sqlite.py @@ -10,22 +10,26 @@ Optionally, zlib ("gzip") compression can be used. """ +from __future__ import annotations + +from typing import Any, BinaryIO, Iterator +from typing_extensions import override, Self + import os import base64 import zlib + +from io import BytesIO from sqlite3 import connect, Row, IntegrityError from moin.constants.namespaces import NAMESPACE_USERPROFILES -from . import BytesMutableStoreBase, FileMutableStoreBase, FileMutableStoreMixin +from . import BytesStoreBase, FileStoreBase -class BytesStore(BytesMutableStoreBase): - """ - A simple SQLite-based store. - """ +class SqliteStoreMixin: @classmethod - def from_uri(cls, uri): + def from_uri(cls: type[Self], uri: str) -> Self: """ Create a new cls instance from the given URI. @@ -37,11 +41,16 @@ def from_uri(cls, uri): # Using "::" to support Windows pathnames that # may include ":" after the drive letter. params = uri.split("::") - if len(params) == 3: - params[2] = int(params[2]) - return cls(*params) - - def __init__(self, db_name, table_name="store", compression_level=0): + kwargs: dict[str, Any] = {} + if len(params) > 0: + kwargs["db_name"] = params[0] + if len(params) > 1: + kwargs["table_name"] = params[1] + if len(params) > 2: + kwargs["compression_level"] = int(params[2]) + return cls(**kwargs) + + def __init__(self, db_name: str, table_name: str = "store", compression_level: int = 0) -> None: """ Just store the params. @@ -60,32 +69,32 @@ def __init__(self, db_name, table_name="store", compression_level=0): if not os.path.exists(db_path): os.makedirs(db_path) - def create(self): + def create(self) -> None: conn = connect(self.db_name) with conn: conn.execute(f"create table {self.table_name} (key text primary key, value blob)") - def destroy(self): + def destroy(self) -> None: conn = connect(self.db_name) with conn: conn.execute(f"drop table {self.table_name}") - def open(self): + def open(self) -> None: self.conn = connect(self.db_name, check_same_thread=False) self.conn.row_factory = Row # make column access by ['colname'] possible - def close(self): + def close(self) -> None: pass - def __iter__(self): + def __iter__(self) -> Iterator[str]: for row in self.conn.execute(f"select key from {self.table_name}"): yield row["key"] - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: with self.conn: self.conn.execute(f"delete from {self.table_name} where key=?", (key,)) - def _compress(self, value): + def _compress(self, value: bytes) -> bytes: if self.compression_level: value = zlib.compress(value, self.compression_level) # We store magic start/end markers and the compression level, @@ -94,7 +103,7 @@ def _compress(self, value): tail = "}}}" return header.encode() + value + tail.encode() - def _decompress(self, value): + def _decompress(self, value: bytes) -> bytes: if not (value[:5] == b"{{{GZ" and value[-3:] == b"}}}"): print("value = %r ... %r" % (value[:5], value[-3:])) raise ValueError("Invalid data format in database.") @@ -104,29 +113,55 @@ def _decompress(self, value): value = zlib.decompress(value) return value - def __getitem__(self, key): + def _getitem(self, key: str) -> bytes: rows = list(self.conn.execute(f"select value from {self.table_name} where key=?", (key,))) if not rows: raise KeyError(key) - value = str(rows[0]["value"]) # a string in base64 encoding - value = base64.b64decode(value.encode()) - return self._decompress(value) + base64_value = str(rows[0]["value"]) # a string in base64 encoding + compressed = base64.b64decode(base64_value.encode()) + return self._decompress(compressed) - def __setitem__(self, key, value): - value = self._compress(value) + def _setitem(self, key: str, value: bytes) -> None: + compressed = self._compress(value) with self.conn: - value = base64.b64encode(value).decode() # a string in base64 encoding + base64_value = base64.b64encode(compressed).decode() # a string in base64 encoding try: - self.conn.execute(f"insert into {self.table_name} values (?, ?)", (key, value)) + self.conn.execute(f"insert into {self.table_name} values (?, ?)", (key, base64_value)) except IntegrityError: if NAMESPACE_USERPROFILES in self.db_name: # The userprofiles namespace does not support revisions, so we update the existing row self.conn.execute( - 'UPDATE {0} SET value = "{2}" WHERE key = "{1}"'.format(self.table_name, key, value) + 'UPDATE {0} SET value = "{2}" WHERE key = "{1}"'.format(self.table_name, key, base64_value) ) else: raise -class FileStore(FileMutableStoreMixin, BytesStore, FileMutableStoreBase): - """SQLite FileStore.""" +class BytesStore(SqliteStoreMixin, BytesStoreBase): + """ + A simple SQLite-based store. + """ + + @override + def __getitem__(self, key: str) -> bytes: + return self._getitem(key) + + @override + def __setitem__(self, key: str, value: bytes) -> None: + self._setitem(key, value) + + +class FileStore(SqliteStoreMixin, FileStoreBase): + """ + SQLite FileStore. + """ + + @override + def __getitem__(self, key: str) -> BinaryIO: + value = self._getitem(key) + return BytesIO(value) + + @override + def __setitem__(self, key: str, stream: BinaryIO) -> None: + value = stream.read() + self._setitem(key, value)