diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py index 10676d8..91898f8 100644 --- a/larray_eurostat/tsv.py +++ b/larray_eurostat/tsv.py @@ -27,28 +27,58 @@ def transform_time_labels(label): return str_label -EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/" +EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data" + +# References +# ========== +# https://sdmx.org/sdmx_cdcl/ +# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_v2_1.docx +# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_implementation_20-10-2014.pdf +# A / normal value +# B / time series break (highest importance) +# D / definition differs +# E / estimated value +# F / forecast value +# G / experimental value +# H / missing value; holiday or weekend +# I / imputed value +# J / derogation +# K / Data included in another category +# L / missing value; data exist but were not collected +# M / missing value; data cannot exist +# N / not significant +# O / missing value +# P / provisional value +# Q / missing value; suppressed +# S / strike and other special events +# U / low reliability +# V / unvalidated value +# W / Includes data from another category +SDMX_2_1_FLAGS = ': abdefghijklmnopqsuvw' def _get_one(indicator, *, drop_markers=True): """Get one Eurostat indicator and return it as an array.""" - url = f"{EUROSTAT_BASEURL}{indicator}?format=TSV&compressed=true" + url = f"{EUROSTAT_BASEURL}/{indicator}?format=TSV&compressed=true" with urlopen(url) as f, gzip.open(f, mode='rt') as fgz: # noqa: S310 try: s = fgz.read() if drop_markers: - first_line_end = s.index('\n') + first_line_end = s.index('\n') + 1 # strip markers except on first line - s = s[:first_line_end] + _remove_chars(s[first_line_end:], ' dbefcuipsrzn:') + header_line = s[:first_line_end] + data_lines = s[first_line_end:].replace('@C', '') + s = header_line + _remove_chars(data_lines, + SDMX_2_1_FLAGS) la_data = read_eurostat(StringIO(s)) - + # Rename time axis. Rename time labels and reverse them (compatibility old API) la_data = la_data.rename(TIME_PERIOD='time') if np.issubdtype(la_data.time.dtype, np.character): la_data = la_data.set_labels('time', transform_time_labels) la_data = la_data.reverse('time') - + # If only one frequency: subset and return without redundant freq Axis (compatibility old API) if len(la_data.freq) == 1: return la_data[la_data.freq.i[0]] diff --git a/pyproject.toml b/pyproject.toml index 4f7ab52..083ddae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,13 +8,13 @@ requires = [ name = "larray_eurostat" version = "0.35-dev" description = "Additional package to import Eurostat files using LArray" -readme = "README.rst" +readme = { file = "README.rst", content-type = "text/x-rst" } authors = [ - {name = "Gaetan de Menten", email = "gdementen@gmail.com"}, {name = "Geert Bryon"}, - {name = "Alix Damman"}, - {name = "Johan Duyck"}, + {name = "Gaetan de Menten", email = "gdementen@gmail.com"}, + {name = "Alix Damman", email = "ald@plan.be"}, + {name = "Yannick Van den Abbeel"}, ] classifiers = [ "Development Status :: 4 - Beta", @@ -48,6 +48,10 @@ Homepage = "https://github.com/larray-project/larray_eurostat" Repository = "https://github.com/larray-project/larray_eurostat" Issues = "https://github.com/larray-project/larray_eurostat/issues" +[tool.setuptools.packages.find] +where = ["."] +namespaces = false + [tool.pytest.ini_options] minversion = "6.0" testpaths = [