Skip to content

Commit e2a85a3

Browse files
authored
fix(binaryread): raise/handle EOFError, deprecate vartype=str (#2226)
This fixes issues while reading some binaryfiles with auto precision detection, and also modernizes a few aspects of flopy.utils.binaryfile left-over from python2. There are two changes to flopy.utils.binaryfile.binaryread(): * Raises EOFError if attempting to read data beyond the end-of-file * Deprecate vartype=str, since bytes is the the return type with Python3 Other refactors: * Simplify conventional ASCII range checks by converting bytes to a list of int, then check if bytes are within range * Remove checks if bytes are not str, and use .encode("ascii") where appropriate
1 parent c69990a commit e2a85a3

File tree

3 files changed

+108
-102
lines changed

3 files changed

+108
-102
lines changed

autotest/test_binaryfile.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,30 @@ def test_binaryread(example_data_path):
5353
np.testing.assert_array_equal(res, np.array([1, 1], np.int32))
5454
res = flopy.utils.binaryfile.binaryread(fp, np.float32, 2)
5555
np.testing.assert_array_equal(res, np.array([10, 10], np.float32))
56-
res = flopy.utils.binaryfile.binaryread(fp, str)
56+
res = flopy.utils.binaryfile.binaryread(fp, bytes)
5757
assert res == b" HEAD"
5858
res = flopy.utils.binaryfile.binaryread(fp, np.int32)
5959
assert res == 20
6060

6161

62+
def test_binaryread_misc(tmp_path):
63+
# Check deprecated warning
64+
file = tmp_path / "data.file"
65+
file.write_bytes(b" data")
66+
with file.open("rb") as fp:
67+
with pytest.deprecated_call(match="vartype=str is deprecated"):
68+
res = flopy.utils.binaryfile.binaryread(fp, str, charlen=5)
69+
assert res == b" data"
70+
# Test exceptions with a small file with 1 byte
71+
file.write_bytes(b"\x00")
72+
with file.open("rb") as fp:
73+
with pytest.raises(EOFError):
74+
flopy.utils.binaryfile.binaryread(fp, bytes, charlen=6)
75+
with file.open("rb") as fp:
76+
with pytest.raises(EOFError):
77+
flopy.utils.binaryfile.binaryread(fp, np.int32)
78+
79+
6280
def test_deprecated_binaryread_struct(example_data_path):
6381
# similar to test_binaryread(), but check the calls are deprecated
6482
pth = example_data_path / "freyberg" / "freyberg.githds"

autotest/test_cellbudgetfile.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,18 @@ def test_cellbudgetfile_build_index_mf6(example_data_path):
285285
)
286286

287287

288+
def test_cellbudgetfile_imeth_5(example_data_path):
289+
pth = example_data_path / "preserve_unitnums/testsfr2.ghb.cbc"
290+
with CellBudgetFile(pth) as cbc:
291+
pass
292+
# check a few components
293+
pd.testing.assert_index_equal(
294+
cbc.headers.index, pd.Index(np.arange(12, dtype=np.int64) * 156 + 64)
295+
)
296+
assert cbc.headers.text.unique().tolist() == ["HEAD DEP BOUNDS"]
297+
assert cbc.headers.imeth.unique().tolist() == [5]
298+
299+
288300
@pytest.fixture
289301
def zonbud_model_path(example_data_path):
290302
return example_data_path / "zonbud_examples"

flopy/utils/binaryfile.py

Lines changed: 77 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ class BinaryHeader(Header):
171171
172172
Parameters
173173
----------
174-
bintype : str
175-
Type of file being opened. Accepted values are 'head' and 'ucn'.
176-
precision : str
177-
Precision of floating point data in the file.
174+
bintype : str, default None
175+
Type of file being opened. Accepted values are 'head' and 'ucn'.
176+
precision : str, default 'single'
177+
Precision of floating point data in the file.
178178
179179
"""
180180

@@ -313,32 +313,47 @@ def binaryread_struct(file, vartype, shape=(1,), charlen=16):
313313

314314
def binaryread(file, vartype, shape=(1,), charlen=16):
315315
"""
316-
Read text, a scalar value, or an array of values from a binary file.
316+
Read character bytes, scalar or array values from a binary file.
317317
318318
Parameters
319319
----------
320320
file : file object
321321
is an open file object
322322
vartype : type
323-
is the return variable type: str, numpy.int32, numpy.float32,
324-
or numpy.float64
323+
is the return variable type: bytes, numpy.int32,
324+
numpy.float32, or numpy.float64. Using str is deprecated since
325+
bytes is preferred.
325326
shape : tuple, default (1,)
326327
is the shape of the returned array (shape(1, ) returns a single
327328
value) for example, shape = (nlay, nrow, ncol)
328329
charlen : int, default 16
329-
is the length of the text string. Note that string arrays
330-
cannot be returned, only multi-character strings. Shape has no
331-
affect on strings.
330+
is the length character bytes. Note that arrays of bytes
331+
cannot be returned, only multi-character bytes. Shape has no
332+
affect on bytes.
332333
334+
Raises
335+
------
336+
EOFError
333337
"""
334338

335-
# read a string variable of length charlen
336339
if vartype == str:
340+
# handle a hang-over from python2
341+
warnings.warn(
342+
"vartype=str is deprecated; use vartype=bytes instead.",
343+
DeprecationWarning,
344+
)
345+
vartype = bytes
346+
if vartype == bytes:
347+
# read character bytes of length charlen
337348
result = file.read(charlen)
349+
if len(result) < charlen:
350+
raise EOFError
338351
else:
339352
# find the number of values
340353
nval = np.prod(shape)
341354
result = np.fromfile(file, vartype, nval)
355+
if result.size < nval:
356+
raise EOFError
342357
if nval != 1:
343358
result = np.reshape(result, shape)
344359
return result
@@ -364,23 +379,18 @@ def get_headfile_precision(filename: Union[str, os.PathLike]):
364379
Parameters
365380
----------
366381
filename : str or PathLike
367-
Path of binary MODFLOW file to determine precision.
382+
Path of binary MODFLOW file to determine precision.
368383
369384
Returns
370385
-------
371-
result : str
372-
Result will be unknown, single, or double
386+
str
387+
Result will be unknown, single, or double
373388
374389
"""
375390

376391
# Set default result if neither single or double works
377392
result = "unknown"
378393

379-
# Create string containing set of ascii characters
380-
asciiset = " "
381-
for i in range(33, 127):
382-
asciiset += chr(i)
383-
384394
# Open file, and check filesize to ensure this is not an empty file
385395
f = open(filename, "rb")
386396
f.seek(0, 2)
@@ -399,15 +409,12 @@ def get_headfile_precision(filename: Union[str, os.PathLike]):
399409
("text", "S16"),
400410
]
401411
hdr = binaryread(f, vartype)
402-
text = hdr[0][4]
403-
try:
404-
text = text.decode()
405-
for t in text:
406-
if t.upper() not in asciiset:
407-
raise Exception()
412+
charbytes = list(hdr[0][4])
413+
if min(charbytes) >= 32 and max(charbytes) <= 126:
414+
# check if bytes are within conventional ASCII range
408415
result = "single"
409416
success = True
410-
except:
417+
else:
411418
success = False
412419

413420
# next try double
@@ -421,14 +428,10 @@ def get_headfile_precision(filename: Union[str, os.PathLike]):
421428
("text", "S16"),
422429
]
423430
hdr = binaryread(f, vartype)
424-
text = hdr[0][4]
425-
try:
426-
text = text.decode()
427-
for t in text:
428-
if t.upper() not in asciiset:
429-
raise Exception()
431+
charbytes = list(hdr[0][4])
432+
if min(charbytes) >= 32 and max(charbytes) <= 126:
430433
result = "double"
431-
except:
434+
else:
432435
f.close()
433436
raise ValueError(
434437
f"Could not determine the precision of the headfile {filename}"
@@ -1171,7 +1174,7 @@ def _set_precision(self, precision="single"):
11711174

11721175
try:
11731176
self._build_index()
1174-
except BudgetIndexError:
1177+
except (BudgetIndexError, EOFError):
11751178
success = False
11761179
self.__reset()
11771180

@@ -1201,20 +1204,14 @@ def _build_index(self):
12011204
Build the ordered dictionary, which maps the header information
12021205
to the position in the binary file.
12031206
"""
1204-
asciiset = " "
1205-
for i in range(33, 127):
1206-
asciiset += chr(i)
1207-
12081207
# read first record
12091208
header = self._get_header()
12101209
nrow = header["nrow"]
12111210
ncol = header["ncol"]
1212-
text = header["text"]
1213-
if isinstance(text, bytes):
1214-
text = text.decode()
1211+
text = header["text"].decode("ascii").strip()
12151212
if nrow < 0 or ncol < 0:
12161213
raise Exception("negative nrow, ncol")
1217-
if not text.endswith("FLOW-JA-FACE"):
1214+
if text != "FLOW-JA-FACE":
12181215
self.nrow = nrow
12191216
self.ncol = ncol
12201217
self.nlay = np.abs(header["nlay"])
@@ -1242,17 +1239,14 @@ def _build_index(self):
12421239
self.kstpkper.append(kstpkper)
12431240
if header["text"] not in self.textlist:
12441241
# check the precision of the file using text records
1245-
try:
1246-
tlist = [header["text"], header["modelnam"]]
1247-
for text in tlist:
1248-
if isinstance(text, bytes):
1249-
text = text.decode()
1250-
for t in text:
1251-
if t.upper() not in asciiset:
1252-
raise Exception()
1253-
1254-
except:
1255-
raise BudgetIndexError("Improper precision")
1242+
tlist = [header["text"], header["modelnam"]]
1243+
for text in tlist:
1244+
if len(text) == 0:
1245+
continue
1246+
charbytes = list(text)
1247+
if min(charbytes) < 32 or max(charbytes) > 126:
1248+
# not in conventional ASCII range
1249+
raise BudgetIndexError("Improper precision")
12561250
self.textlist.append(header["text"])
12571251
self.imethlist.append(header["imeth"])
12581252
if header["paknam"] not in self.paknamlist_from:
@@ -1279,23 +1273,15 @@ def _build_index(self):
12791273
"paknam2",
12801274
]:
12811275
s = header[itxt]
1282-
if isinstance(s, bytes):
1283-
s = s.decode()
12841276
print(f"{itxt}: {s}")
12851277
print("file position: ", ipos)
1286-
if (
1287-
header["imeth"].item() != 5
1288-
and header["imeth"].item() != 6
1289-
and header["imeth"].item() != 7
1290-
):
1278+
if header["imeth"].item() not in {5, 6, 7}:
12911279
print("")
12921280

12931281
# set the nrow, ncol, and nlay if they have not been set
12941282
if self.nrow == 0:
1295-
text = header["text"]
1296-
if isinstance(text, bytes):
1297-
text = text.decode()
1298-
if not text.endswith("FLOW-JA-FACE"):
1283+
text = header["text"].decode("ascii").strip()
1284+
if text != "FLOW-JA-FACE":
12991285
self.nrow = header["nrow"]
13001286
self.ncol = header["ncol"]
13011287
self.nlay = np.abs(header["nlay"])
@@ -1350,51 +1336,47 @@ def _skip_record(self, header):
13501336
nrow = header["nrow"]
13511337
ncol = header["ncol"]
13521338
imeth = header["imeth"]
1339+
realtype_nbytes = self.realtype(1).nbytes
13531340
if imeth == 0:
1354-
nbytes = nrow * ncol * nlay * self.realtype(1).nbytes
1341+
nbytes = nrow * ncol * nlay * realtype_nbytes
13551342
elif imeth == 1:
1356-
nbytes = nrow * ncol * nlay * self.realtype(1).nbytes
1343+
nbytes = nrow * ncol * nlay * realtype_nbytes
13571344
elif imeth == 2:
13581345
nlist = binaryread(self.file, np.int32)[0]
1359-
nbytes = nlist * (np.int32(1).nbytes + self.realtype(1).nbytes)
1346+
nbytes = nlist * (4 + realtype_nbytes)
13601347
elif imeth == 3:
1361-
nbytes = nrow * ncol * self.realtype(1).nbytes
1362-
nbytes += nrow * ncol * np.int32(1).nbytes
1348+
nbytes = nrow * ncol * realtype_nbytes + (nrow * ncol * 4)
13631349
elif imeth == 4:
1364-
nbytes = nrow * ncol * self.realtype(1).nbytes
1350+
nbytes = nrow * ncol * realtype_nbytes
13651351
elif imeth == 5:
13661352
nauxp1 = binaryread(self.file, np.int32)[0]
13671353
naux = nauxp1 - 1
1368-
1369-
for i in range(naux):
1370-
temp = binaryread(self.file, str, charlen=16)
1354+
naux_nbytes = naux * 16
1355+
if naux_nbytes:
1356+
check = self.file.seek(naux_nbytes, 1)
1357+
if check < naux_nbytes:
1358+
raise EOFError
13711359
nlist = binaryread(self.file, np.int32)[0]
13721360
if self.verbose:
13731361
print("naux: ", naux)
13741362
print("nlist: ", nlist)
13751363
print("")
1376-
nbytes = nlist * (
1377-
np.int32(1).nbytes
1378-
+ self.realtype(1).nbytes
1379-
+ naux * self.realtype(1).nbytes
1380-
)
1364+
nbytes = nlist * (4 + realtype_nbytes + naux * realtype_nbytes)
13811365
elif imeth == 6:
13821366
# read rest of list data
13831367
nauxp1 = binaryread(self.file, np.int32)[0]
13841368
naux = nauxp1 - 1
1385-
1386-
for i in range(naux):
1387-
temp = binaryread(self.file, str, charlen=16)
1369+
naux_nbytes = naux * 16
1370+
if naux_nbytes:
1371+
check = self.file.seek(naux_nbytes, 1)
1372+
if check < naux_nbytes:
1373+
raise EOFError
13881374
nlist = binaryread(self.file, np.int32)[0]
13891375
if self.verbose:
13901376
print("naux: ", naux)
13911377
print("nlist: ", nlist)
13921378
print("")
1393-
nbytes = nlist * (
1394-
np.int32(1).nbytes * 2
1395-
+ self.realtype(1).nbytes
1396-
+ naux * self.realtype(1).nbytes
1397-
)
1379+
nbytes = nlist * (4 * 2 + realtype_nbytes + naux * realtype_nbytes)
13981380
else:
13991381
raise Exception(f"invalid method code {imeth}")
14001382
if nbytes != 0:
@@ -1418,10 +1400,10 @@ def _get_header(self):
14181400
for name in temp.dtype.names:
14191401
header2[name] = temp[name]
14201402
if header2["imeth"].item() == 6:
1421-
header2["modelnam"] = binaryread(self.file, str, charlen=16)
1422-
header2["paknam"] = binaryread(self.file, str, charlen=16)
1423-
header2["modelnam2"] = binaryread(self.file, str, charlen=16)
1424-
header2["paknam2"] = binaryread(self.file, str, charlen=16)
1403+
header2["modelnam"] = binaryread(self.file, bytes, charlen=16)
1404+
header2["paknam"] = binaryread(self.file, bytes, charlen=16)
1405+
header2["modelnam2"] = binaryread(self.file, bytes, charlen=16)
1406+
header2["paknam2"] = binaryread(self.file, bytes, charlen=16)
14251407
else:
14261408
header2 = np.array(
14271409
[(0, 0.0, 0.0, 0.0, "", "", "", "")], dtype=self.header2_dtype
@@ -1951,9 +1933,7 @@ def get_record(self, idx, full3D=False):
19511933
self.file.seek(ipos, 0)
19521934
imeth = header["imeth"][0]
19531935

1954-
t = header["text"][0]
1955-
if isinstance(t, bytes):
1956-
t = t.decode("utf-8")
1936+
t = header["text"][0].decode("ascii")
19571937
s = f"Returning {t.strip()} as "
19581938

19591939
nlay = abs(header["nlay"][0])
@@ -2039,10 +2019,8 @@ def get_record(self, idx, full3D=False):
20392019
naux = nauxp1 - 1
20402020
l = [("node", np.int32), ("q", self.realtype)]
20412021
for i in range(naux):
2042-
auxname = binaryread(self.file, str, charlen=16)
2043-
if not isinstance(auxname, str):
2044-
auxname = auxname.decode()
2045-
l.append((auxname.strip(), self.realtype))
2022+
auxname = binaryread(self.file, bytes, charlen=16)
2023+
l.append((auxname.decode("ascii").strip(), self.realtype))
20462024
dtype = np.dtype(l)
20472025
nlist = binaryread(self.file, np.int32)[0]
20482026
data = binaryread(self.file, dtype, shape=(nlist,))
@@ -2064,10 +2042,8 @@ def get_record(self, idx, full3D=False):
20642042
naux = nauxp1 - 1
20652043
l = [("node", np.int32), ("node2", np.int32), ("q", self.realtype)]
20662044
for i in range(naux):
2067-
auxname = binaryread(self.file, str, charlen=16)
2068-
if not isinstance(auxname, str):
2069-
auxname = auxname.decode()
2070-
l.append((auxname.strip(), self.realtype))
2045+
auxname = binaryread(self.file, bytes, charlen=16)
2046+
l.append((auxname.decode("ascii").strip(), self.realtype))
20712047
dtype = np.dtype(l)
20722048
nlist = binaryread(self.file, np.int32)[0]
20732049
data = binaryread(self.file, dtype, shape=(nlist,))

0 commit comments

Comments
 (0)