-
Notifications
You must be signed in to change notification settings - Fork 280
Advisory locking for metadata and artifacts files #2861
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
53cc81b
eeb59f8
6d66696
7a8edd9
b63cf71
ba3adef
cbe34d9
ba0842f
5f467bb
55dbb53
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import sys | ||
import time | ||
|
||
from tuf.ngclient import Updater | ||
|
||
print(f"Fetching metadata {sys.argv[1]} times:") | ||
print(f" metadata dir: {sys.argv[2]}") | ||
print(f" metadata url: {sys.argv[3]}") | ||
|
||
start = time.time() | ||
|
||
for i in range(int(sys.argv[1])): | ||
try: | ||
refresh_start = time.time() | ||
u = Updater(metadata_dir=sys.argv[2], metadata_base_url=sys.argv[3]) | ||
# file3.txt is delegated so we end up exercising all metadata load paths | ||
u.get_targetinfo("file3.txt") | ||
except OSError as e: | ||
print( | ||
f"Failed on iteration {i}, " | ||
f"{time.time() - refresh_start} secs elapsed ({time.time() - start} total)" | ||
) | ||
raise e |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import logging | ||
import time | ||
from collections.abc import Iterator | ||
from contextlib import contextmanager | ||
from typing import IO | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
try: | ||
# advisory file locking for posix | ||
import fcntl | ||
|
||
@contextmanager | ||
def lock_file(path: str) -> Iterator[IO]: | ||
with open(path, "wb") as f: | ||
fcntl.lockf(f, fcntl.LOCK_EX) | ||
yield f | ||
|
||
except ModuleNotFoundError: | ||
# Windows file locking, in belt-and-suspenders-from-Temu style: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😆 |
||
# Use a loop that tries to open the lockfile for 30 secs, but also | ||
# use msvcrt.locking(). | ||
# * since open() usually just fails when another process has the file open | ||
# msvcrt.locking() almost never gets called when there is a lock. open() | ||
# sometimes succeeds for multiple processes though | ||
# * msvcrt.locking() does not even block until file is available: it just | ||
# tries once per second in a non-blocking manner for 10 seconds. So if | ||
# another process keeps opening the file it's unlikely that we actually | ||
# get the lock | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this means the timeout could be anything between 10 and 40 seconds? Would it make sense to use |
||
import msvcrt | ||
|
||
@contextmanager | ||
def lock_file(path: str) -> Iterator[IO]: | ||
err = None | ||
locked = False | ||
for _ in range(100): | ||
try: | ||
with open(path, "wb") as f: | ||
msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you lock 1 byte, in an empty file? I guess you can. |
||
locked = True | ||
yield f | ||
return | ||
except FileNotFoundError: | ||
# could be from yield or from open() -- either way we bail | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are opening the file for wring. Should this ever lead to a FileNotFoundError?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can come from either :
|
||
raise | ||
except OSError as e: | ||
if locked: | ||
# yield has raised, let's not continue loop | ||
raise e | ||
err = e | ||
logger.warning("Unsuccessful lock attempt for %s: %s", path, e) | ||
time.sleep(0.3) | ||
|
||
# raise the last failure if we never got a lock | ||
if err is not None: | ||
raise err |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,10 +29,6 @@ | |
* ``Updater.download_target()`` downloads a target file and ensures it is | ||
verified correct by the metadata. | ||
|
||
Note that applications using ``Updater`` should be 'single instance' | ||
applications: running multiple instances that use the same cache directories at | ||
the same time is not supported. | ||
|
||
A simple example of using the Updater to implement a Python TUF client that | ||
downloads target files is available in `examples/client | ||
<https://github.com/theupdateframework/python-tuf/tree/develop/examples/client>`_. | ||
|
@@ -64,11 +60,14 @@ | |
|
||
from tuf.api import exceptions | ||
from tuf.api.metadata import Root, Snapshot, TargetFile, Targets, Timestamp | ||
from tuf.ngclient._internal.file_lock import lock_file | ||
from tuf.ngclient._internal.trusted_metadata_set import TrustedMetadataSet | ||
from tuf.ngclient.config import EnvelopeType, UpdaterConfig | ||
from tuf.ngclient.urllib3_fetcher import Urllib3Fetcher | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Iterator | ||
|
||
from tuf.ngclient.fetcher import FetcherInterface | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -131,16 +130,30 @@ def __init__( | |
f"got '{self.config.envelope_type}'" | ||
) | ||
|
||
if not bootstrap: | ||
# if no root was provided, use the cached non-versioned root.json | ||
bootstrap = self._load_local_metadata(Root.type) | ||
# Ensure the whole metadata directory structure exists | ||
rootdir = Path(self._dir, "root_history") | ||
rootdir.mkdir(exist_ok=True, parents=True) | ||
|
||
# Load the initial root, make sure it's cached | ||
self._trusted_set = TrustedMetadataSet( | ||
bootstrap, self.config.envelope_type | ||
) | ||
self._persist_root(self._trusted_set.root.version, bootstrap) | ||
self._update_root_symlink() | ||
with self._lock_metadata(): | ||
if not bootstrap: | ||
# if no root was provided, use the cached non-versioned root | ||
bootstrap = self._load_local_metadata(Root.type) | ||
|
||
# Load the initial root, make sure it's cached | ||
self._trusted_set = TrustedMetadataSet( | ||
bootstrap, self.config.envelope_type | ||
) | ||
self._persist_root(self._trusted_set.root.version, bootstrap) | ||
self._update_root_symlink() | ||
|
||
@contextlib.contextmanager | ||
def _lock_metadata(self) -> Iterator[None]: | ||
"""Context manager for locking the metadata directory.""" | ||
|
||
logger.debug("Getting metadata lock...") | ||
with lock_file(os.path.join(self._dir, ".lock")): | ||
yield | ||
logger.debug("Released metadata lock") | ||
|
||
def refresh(self) -> None: | ||
"""Refresh top-level metadata. | ||
|
@@ -166,10 +179,11 @@ def refresh(self) -> None: | |
DownloadError: Download of a metadata file failed in some way | ||
""" | ||
|
||
self._load_root() | ||
self._load_timestamp() | ||
self._load_snapshot() | ||
self._load_targets(Targets.type, Root.type) | ||
with self._lock_metadata(): | ||
self._load_root() | ||
self._load_timestamp() | ||
self._load_snapshot() | ||
self._load_targets(Targets.type, Root.type) | ||
|
||
def _generate_target_file_path(self, targetinfo: TargetFile) -> str: | ||
if self.target_dir is None: | ||
|
@@ -205,9 +219,14 @@ def get_targetinfo(self, target_path: str) -> TargetFile | None: | |
``TargetFile`` instance or ``None``. | ||
""" | ||
|
||
if Targets.type not in self._trusted_set: | ||
self.refresh() | ||
return self._preorder_depth_first_walk(target_path) | ||
with self._lock_metadata(): | ||
if Targets.type not in self._trusted_set: | ||
# implicit refresh | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're not calling refresh, which has its own lock, to include pdf walk in the same lock, right? |
||
self._load_root() | ||
self._load_timestamp() | ||
self._load_snapshot() | ||
self._load_targets(Targets.type, Root.type) | ||
return self._preorder_depth_first_walk(target_path) | ||
|
||
def find_cached_target( | ||
self, | ||
|
@@ -295,7 +314,7 @@ def download_target( | |
targetinfo.verify_length_and_hashes(target_file) | ||
|
||
target_file.seek(0) | ||
with open(filepath, "wb") as destination_file: | ||
with lock_file(filepath) as destination_file: | ||
shutil.copyfileobj(target_file, destination_file) | ||
|
||
logger.debug("Downloaded target %s", targetinfo.path) | ||
|
@@ -335,7 +354,6 @@ def _persist_root(self, version: int, data: bytes) -> None: | |
"root_history/1.root.json"). | ||
""" | ||
rootdir = Path(self._dir, "root_history") | ||
rootdir.mkdir(exist_ok=True, parents=True) | ||
self._persist_file(str(rootdir / f"{version}.root.json"), data) | ||
|
||
def _persist_file(self, filename: str, data: bytes) -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It took me a while to realise that closing the file releases the lock (even after reading fcntl docs). Maybe this is obvious to others. But a brief comment about how this works might be helpful.