1212import itertools
1313import numbers
1414import os
15- import platform
1615import re
1716import warnings
1817from collections .abc import Iterable
19- from urllib .parse import unquote , urlparse
18+ from pathlib import Path
19+ from typing import IO
20+ from urllib .parse import urlparse
2021
2122import fsspec
2223import numpy
2324import packaging .version
2425
25- win = platform .system ().lower ().startswith ("win" )
26+ import uproot .source .chunk
27+ import uproot .source .fsspec
28+ import uproot .source .object
2629
2730
2831def tobytes (array ):
@@ -36,7 +39,7 @@ def tobytes(array):
3639 return array .tostring ()
3740
3841
39- def isint (x ):
42+ def isint (x ) -> bool :
4043 """
4144 Returns True if and only if ``x`` is an integer (including NumPy, not
4245 including bool).
@@ -46,7 +49,7 @@ def isint(x):
4649 )
4750
4851
49- def isnum (x ):
52+ def isnum (x ) -> bool :
5053 """
5154 Returns True if and only if ``x`` is a number (including NumPy, not
5255 including bool).
@@ -56,7 +59,7 @@ def isnum(x):
5659 )
5760
5861
59- def ensure_str (x ):
62+ def ensure_str (x ) -> str :
6063 """
6164 Ensures that ``x`` is a string (decoding with 'surrogateescape' if necessary).
6265 """
@@ -94,18 +97,17 @@ def is_file_like(
9497 obj , readable : bool = False , writable : bool = False , seekable : bool = False
9598) -> bool :
9699 return (
97- callable (getattr (obj , "read" , None ))
98- and callable (getattr (obj , "write" , None ))
99- and callable (getattr (obj , "seek" , None ))
100- and callable (getattr (obj , "tell" , None ))
101- and callable (getattr (obj , "flush" , None ))
100+ all (
101+ callable (getattr (obj , attr , None ))
102+ for attr in ("read" , "write" , "seek" , "tell" , "flush" )
103+ )
102104 and (not readable or not hasattr (obj , "readable" ) or obj .readable ())
103105 and (not writable or not hasattr (obj , "writable" ) or obj .writable ())
104106 and (not seekable or not hasattr (obj , "seekable" ) or obj .seekable ())
105107 )
106108
107109
108- def parse_version (version ):
110+ def parse_version (version : str ):
109111 """
110112 Converts a semver string into a Version object that can be compared with
111113 ``<``, ``>=``, etc.
@@ -116,7 +118,7 @@ def parse_version(version):
116118 return packaging .version .parse (version )
117119
118120
119- def from_module (obj , module_name ) :
121+ def from_module (obj , module_name : str ) -> bool :
120122 """
121123 Returns True if ``obj`` is an instance of a class from a module
122124 given by name.
@@ -155,7 +157,7 @@ def _regularize_filter_regex_flags(flags):
155157 return flagsbyte
156158
157159
158- def no_filter (x ):
160+ def no_filter (x ) -> bool :
159161 """
160162 A filter that accepts anything (always returns True).
161163 """
@@ -285,10 +287,6 @@ def regularize_path(path):
285287 return path
286288
287289
288- _windows_drive_letter_ending = re .compile (r".*\b[A-Za-z]$" )
289- _windows_absolute_path_pattern = re .compile (r"^[A-Za-z]:[\\/]" )
290- _windows_absolute_path_pattern_slash = re .compile (r"^[\\/][A-Za-z]:[\\/]" )
291-
292290# These schemes may not appear in fsspec if the corresponding libraries are not installed (e.g. s3fs)
293291_remote_schemes = ["root" , "s3" , "http" , "https" ]
294292_schemes = list ({* _remote_schemes , * fsspec .available_protocols ()})
@@ -324,87 +322,48 @@ def file_object_path_split(urlpath: str) -> tuple[str, str | None]:
324322 return urlpath , obj
325323
326324
327- def file_path_to_source_class (file_path , options ):
325+ def file_path_to_source_class (
326+ file_path_or_object : str | Path | IO , options : dict
327+ ) -> tuple [type [uproot .source .chunk .Source ], str | IO ]:
328328 """
329329 Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it.
330330
331331 Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`.
332332 """
333333
334- import uproot .source .chunk
335-
336- file_path = regularize_path (file_path )
334+ file_path_or_object : str | IO = regularize_path (file_path_or_object )
337335
338336 source_cls = options ["handler" ]
339- if source_cls is not None :
340- if not (
341- isinstance (source_cls , type )
342- and issubclass (source_cls , uproot .source .chunk .Source )
337+ if source_cls is not None and not (
338+ isinstance (source_cls , type )
339+ and issubclass (source_cls , uproot .source .chunk .Source )
340+ ):
341+ raise TypeError (
342+ f"'handler' is not a class object inheriting from Source: { source_cls !r} "
343+ )
344+
345+ # Infer the source class from the file path
346+ if all (
347+ callable (getattr (file_path_or_object , attr , None )) for attr in ("read" , "seek" )
348+ ):
349+ # need a very soft object check for ubuntu python3.8 pyroot ci tests, cannot use uproot._util.is_file_like
350+ if (
351+ source_cls is not None
352+ and source_cls is not uproot .source .object .ObjectSource
343353 ):
344354 raise TypeError (
345- f"'handler' is not a class object inheriting from Source : { source_cls !r} "
355+ f"'handler' is not ObjectSource for a file-like object : { source_cls !r} "
346356 )
347- return source_cls , file_path
348-
349- if (
350- not isinstance (file_path , str )
351- and hasattr (file_path , "read" )
352- and hasattr (file_path , "seek" )
353- ):
354- source_cls = uproot .source .object .ObjectSource
355- return source_cls , file_path
356-
357- windows_absolute_path = None
358- if win and _windows_absolute_path_pattern .match (file_path ) is not None :
359- windows_absolute_path = file_path
360-
361- parsed_url = urlparse (file_path )
362- if parsed_url .scheme .lower () == "file" :
363- parsed_url_path = unquote (parsed_url .path )
357+ return uproot .source .object .ObjectSource , file_path_or_object
358+ elif isinstance (file_path_or_object , str ):
359+ source_cls = (
360+ uproot .source .fsspec .FSSpecSource if source_cls is None else source_cls
361+ )
362+ return source_cls , file_path_or_object
364363 else :
365- parsed_url_path = parsed_url .path
366-
367- if win and windows_absolute_path is None :
368- if _windows_absolute_path_pattern .match (parsed_url_path ) is not None :
369- windows_absolute_path = parsed_url_path
370- elif _windows_absolute_path_pattern_slash .match (parsed_url_path ) is not None :
371- windows_absolute_path = parsed_url_path [1 :]
372-
373- scheme = parsed_url .scheme .lower ()
374- if (
375- scheme == "file"
376- or len (parsed_url .scheme ) == 0
377- or windows_absolute_path is not None
378- ):
379- if windows_absolute_path is None :
380- if parsed_url .netloc .lower () == "localhost" :
381- file_path = parsed_url_path
382- else :
383- file_path = parsed_url .netloc + parsed_url_path
384- else :
385- file_path = windows_absolute_path
386-
387- # uproot.source.file.MemmapSource
388- source_cls = uproot .source .fsspec .FSSpecSource
389-
390- return source_cls , os .path .expanduser (file_path )
391-
392- elif scheme == "root" :
393- # uproot.source.xrootd.XRootDSource
394- source_cls = uproot .source .fsspec .FSSpecSource
395- return source_cls , file_path
396-
397- elif scheme == "s3" :
398- # uproot.source.s3.S3Source
399- source_cls = uproot .source .fsspec .FSSpecSource
400- return source_cls , file_path
401-
402- elif scheme in ("http" , "https" ):
403- # uproot.source.http.HTTPSource
404- source_cls = uproot .source .fsspec .FSSpecSource
405- return source_cls , file_path
406-
407- return uproot .source .fsspec .FSSpecSource , file_path
364+ raise TypeError (
365+ f"file_path is not a string or file-like object: { file_path_or_object !r} "
366+ )
408367
409368
410369if isinstance (__builtins__ , dict ):
@@ -448,7 +407,7 @@ def _file_not_found(files, message=None):
448407 )
449408
450409
451- def memory_size (data , error_message = None ):
410+ def memory_size (data , error_message = None ) -> int :
452411 """
453412 Regularizes strings like '## kB' and plain integer number of bytes to
454413 an integer number of bytes.
@@ -739,7 +698,7 @@ def damerau_levenshtein(a, b, ratio=False):
739698 # Modified Damerau-Levenshtein distance. Adds a middling penalty
740699 # for capitalization.
741700 # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
742- M = [[0 ] * (len (b ) + 1 ) for i in range (len (a ) + 1 )]
701+ M = [[0 ] * (len (b ) + 1 ) for _ in range (len (a ) + 1 )]
743702
744703 for i in range (len (a ) + 1 ):
745704 M [i ][0 ] = i
@@ -771,7 +730,7 @@ def damerau_levenshtein(a, b, ratio=False):
771730 # Transpose only
772731 M [i ][j ] = min (M [i ][j ], M [i - 2 ][j - 2 ] + 1 )
773732 else :
774- # Traspose and capitalization
733+ # Transpose and capitalization
775734 M [i ][j ] = min (M [i ][j ], M [i - 2 ][j - 2 ] + 1.5 )
776735
777736 if not ratio :
0 commit comments