Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ For more examples, see the [example notebook here][example-notebook].
- `az:`, `adl:`, `abfs:` and `abfss:` Azure Storage _(requires `adlfs`)_
- `data:` RFC 2397 style data URLs _(requires `fsspec>=2023.12.2`)_
- `github:` GitHub repository filesystem
- `hf:` Hugging Face filesystem _(requires `huggingface_hub`)_
- `http:` and `https:` HTTP(S)-based filesystem
- `hdfs:` Hadoop distributed filesystem
- `gs:` and `gcs:` Google Cloud Storage _(requires `gcsfs`)_
Expand Down
7 changes: 7 additions & 0 deletions upath/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,13 @@ def __new__(
**_: Any,
) -> _uimpl.cloud.AzurePath: ...
@overload # noqa: E301
def __new__(
cls,
*args: JoinablePathLike,
protocol: Literal["hf"],
**_: Any,
) -> _uimpl.cloud.HfPath: ...
@overload # noqa: E301
def __new__(
cls,
*args: JoinablePathLike,
Expand Down
17 changes: 17 additions & 0 deletions upath/implementations/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
from upath._chain import FSSpecChainParser
from upath.types.storage_options import AzureStorageOptions
from upath.types.storage_options import GCSStorageOptions
from upath.types.storage_options import HfStorageOptions
from upath.types.storage_options import S3StorageOptions

__all__ = [
"CloudPath",
"GCSPath",
"S3Path",
"AzurePath",
"HfPath",
]


Expand Down Expand Up @@ -157,3 +159,18 @@ def __init__(
)
if not self.drive and len(self.parts) > 1:
raise ValueError("non key-like path provided (bucket/container missing)")


class HfPath(CloudPath):
__slots__ = ()

def __init__(
self,
*args: JoinablePathLike,
protocol: Literal["hf"] | None = None,
chain_parser: FSSpecChainParser = DEFAULT_CHAIN_PARSER,
**storage_options: Unpack[HfStorageOptions],
) -> None:
super().__init__(
*args, protocol=protocol, chain_parser=chain_parser, **storage_options
)
1 change: 1 addition & 0 deletions upath/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class _Registry(MutableMapping[str, "type[upath.UPath]"]):
"gcs": "upath.implementations.cloud.GCSPath",
"gs": "upath.implementations.cloud.GCSPath",
"hdfs": "upath.implementations.hdfs.HDFSPath",
"hf": "upath.implementations.cloud.HfPath",
"http": "upath.implementations.http.HTTPPath",
"https": "upath.implementations.http.HTTPPath",
"memory": "upath.implementations.memory.MemoryPath",
Expand Down
96 changes: 96 additions & 0 deletions upath/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,3 +553,99 @@ def ssh_fixture(ssh_container, local_testdir, monkeypatch):
)
finally:
fs.delete("/app/testdir", recursive=True)


@pytest.fixture
def hf_test_repo():
# "__username__" is an invalid username so we can use it for tests
return "__username__/test_repo"


@pytest.fixture
def mock_hf_api(pathlib_base, monkeypatch, hf_test_repo): # noqa: C901
huggingface_hub = pytest.importorskip(
"huggingface_hub", reason="hf tests require huggingface_hub"
)
hf_file_system = pytest.importorskip(
"huggingface_hub.hf_file_system", reason="hf tests require huggingface_hub"
)

class MockedHfApi(huggingface_hub.HfApi):

def repo_info(self, repo_id, *args, repo_type=None, **kwargs):
if repo_id != hf_test_repo:
raise huggingface_hub.errors.RepositoryNotFoundError(repo_id)
elif repo_type is None or repo_type == "model":
return huggingface_hub.hf_api.ModelInfo(id=repo_id)
elif repo_type == "dataset":
return huggingface_hub.hf_api.DatasetInfo(id=repo_id)
elif repo_type == "space":
return huggingface_hub.hf_api.SpaceInfo(id=repo_id)
else:
raise ValueError("Unsupported repo type.")

def get_paths_info(self, repo_id, paths, *args, **kwargs):
if repo_id != hf_test_repo:
raise huggingface_hub.errors.RepositoryNotFoundError(repo_id)
paths_info = []
for path in paths:
if path:
path = pathlib_base / path
if path.is_file():
paths_info.append(
huggingface_hub.hf_api.RepoFile(
path=path.relative_to(pathlib_base).as_posix(),
blob_id="blob_id",
size=path.stat().st_size,
)
)
elif path.is_dir():
paths_info.append(
huggingface_hub.hf_api.RepoFolder(
path=path.relative_to(pathlib_base).as_posix(),
tree_id="tree_id",
)
)
return paths_info

def list_repo_tree(
self, repo_id, path_in_repo, *args, recursive=False, **kwargs
):
if repo_id != hf_test_repo:
raise huggingface_hub.errors.RepositoryNotFoundError(repo_id)
pathlib_dir = pathlib_base / path_in_repo if path_in_repo else pathlib_base
for path in pathlib_dir.rglob("*") if recursive else pathlib_dir.glob("*"):
if path.is_file():
yield huggingface_hub.hf_api.RepoFile(
path=path.relative_to(pathlib_base).as_posix(),
oid="oid",
size=path.stat().st_size,
)
else:
yield huggingface_hub.hf_api.RepoFolder(
path=path.relative_to(pathlib_base).as_posix(),
oid="oid",
)

hf_file_system.HfFileSystem.clear_instance_cache()
monkeypatch.setattr(hf_file_system, "HfApi", MockedHfApi)


@pytest.fixture
def mock_hf_filesystem_open(pathlib_base, monkeypatch):
hf_file_system = pytest.importorskip(
"huggingface_hub.hf_file_system", reason="hf tests require huggingface_hub"
)

def mocked_open(fs, path, mode="rb", *args, **kwargs):
resolved_path = fs.resolve_path(path)
return (pathlib_base / resolved_path.path_in_repo).open(mode)

monkeypatch.setattr(hf_file_system.HfFileSystem, "_open", mocked_open)


@pytest.fixture
def hf_fixture_with_readonly_mocked_hf_api(
hf_test_repo, mock_hf_api, mock_hf_filesystem_open
):
return "hf://" + hf_test_repo
97 changes: 97 additions & 0 deletions upath/tests/implementations/test_hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pytest
from fsspec import get_filesystem_class

from upath import UPath
from upath.implementations.cloud import HfPath

from ..cases import BaseTests

try:
get_filesystem_class("hf")
except ImportError:
pytestmark = pytest.mark.skip


def test_hfpath():
path = UPath("hf://HuggingFaceTB/SmolLM2-135M")
assert isinstance(path, HfPath)
assert path.exists()


class TestUPathHttp(BaseTests):
@pytest.fixture(autouse=True, scope="function")
def path(self, hf_fixture_with_readonly_mocked_hf_api):
self.path = UPath(hf_fixture_with_readonly_mocked_hf_api)

@pytest.mark.skip
def test_mkdir(self):
pass

@pytest.mark.skip
def test_mkdir_exists_ok_false(self):
pass

@pytest.mark.skip
def test_mkdir_exists_ok_true(self):
pass

@pytest.mark.skip
def test_mkdir_parents_true_exists_ok_true(self):
pass

@pytest.mark.skip
def test_mkdir_parents_true_exists_ok_false(self):
pass

@pytest.mark.skip
def test_makedirs_exist_ok_true(self):
pass

@pytest.mark.skip
def test_makedirs_exist_ok_false(self):
pass

@pytest.mark.skip
def test_touch(self):
pass

@pytest.mark.skip
def test_touch_unlink(self):
pass

@pytest.mark.skip
def test_write_bytes(self, pathlib_base):
pass

@pytest.mark.skip
def test_write_text(self, pathlib_base):
pass

def test_fsspec_compat(self):
pass

def test_rename(self):
pass

def test_rename2(self):
pass

def test_move_local(self, tmp_path):
pass

def test_move_into_local(self, tmp_path):
pass

def test_move_memory(self, clear_fsspec_memory_cache):
pass

def test_move_into_memory(self, clear_fsspec_memory_cache):
pass

@pytest.mark.skip(reason="HfPath does not support listing repositories")
def test_iterdir(self, local_testdir):
pass

@pytest.mark.skip(reason="HfPath does not support listing repositories")
def test_iterdir2(self, local_testdir):
pass
16 changes: 16 additions & 0 deletions upath/types/storage_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"GCSStorageOptions",
"S3StorageOptions",
"AzureStorageOptions",
"HfStorageOptions",
"DataStorageOptions",
"GitHubStorageOptions",
"HDFSStorageOptions",
Expand Down Expand Up @@ -182,6 +183,21 @@ class AzureStorageOptions(_AbstractStorageOptions, total=False):
assume_container_exists: bool | None # container existence assumptions


class HfStorageOptions(_AbstractStorageOptions, total=False):
"""Storage options for Hugging face filesystem"""

# Authentication
token: str | None

# Connection settings
endpoint: str | None

# Performance settings
block_size: (
int | None
) # Block size for reading bytes; 0 = raw requests file-like objects


class DataStorageOptions(_AbstractStorageOptions, total=False):
"""Storage options for Data URIs filesystem"""

Expand Down