From 1bb226cdc50a736bae1e3140e2507a5a44db2406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 16 Dec 2025 17:07:17 +0100 Subject: [PATCH 1/6] CLN: nicer code --- larray_eurostat/tsv.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py index 10676d8..3422704 100644 --- a/larray_eurostat/tsv.py +++ b/larray_eurostat/tsv.py @@ -27,28 +27,30 @@ def transform_time_labels(label): return str_label -EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/" +EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data" +FLAGS = ': bcdefinprsuz' def _get_one(indicator, *, drop_markers=True): """Get one Eurostat indicator and return it as an array.""" - url = f"{EUROSTAT_BASEURL}{indicator}?format=TSV&compressed=true" + url = f"{EUROSTAT_BASEURL}/{indicator}?format=TSV&compressed=true" with urlopen(url) as f, gzip.open(f, mode='rt') as fgz: # noqa: S310 try: s = fgz.read() if drop_markers: first_line_end = s.index('\n') # strip markers except on first line - s = s[:first_line_end] + _remove_chars(s[first_line_end:], ' dbefcuipsrzn:') + s = s[:first_line_end] + _remove_chars(s[first_line_end:], + FLAGS) la_data = read_eurostat(StringIO(s)) - + # Rename time axis. Rename time labels and reverse them (compatibility old API) la_data = la_data.rename(TIME_PERIOD='time') if np.issubdtype(la_data.time.dtype, np.character): la_data = la_data.set_labels('time', transform_time_labels) la_data = la_data.reverse('time') - + # If only one frequency: subset and return without redundant freq Axis (compatibility old API) if len(la_data.freq) == 1: return la_data[la_data.freq.i[0]] From e6b533c43b78e726a4992ceaece5d740df482989 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 16 Dec 2025 17:10:15 +0100 Subject: [PATCH 2/6] FIX: update flags to ignore for new API --- larray_eurostat/tsv.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py index 3422704..a2a6915 100644 --- a/larray_eurostat/tsv.py +++ b/larray_eurostat/tsv.py @@ -29,7 +29,33 @@ def transform_time_labels(label): EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data" -FLAGS = ': bcdefinprsuz' +# References +# ========== +# https://sdmx.org/sdmx_cdcl/ +# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_v2_1.docx +# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_implementation_20-10-2014.pdf +# A / normal value +# B / time series break (highest importance) +# D / definition differs +# E / estimated value +# F / forecast value +# G / experimental value +# H / missing value; holiday or weekend +# I / imputed value +# J / derogation +# K / Data included in another category +# L / missing value; data exist but were not collected +# M / missing value; data cannot exist +# N / not significant +# O / missing value +# P / provisional value +# Q / missing value; suppressed +# S / strike and other special events +# U / low reliability +# V / unvalidated value +# W / Includes data from another category +SDMX_2_1_FLAGS = ': abdefghijklmnopqsuvw' + def _get_one(indicator, *, drop_markers=True): """Get one Eurostat indicator and return it as an array.""" @@ -41,7 +67,7 @@ def _get_one(indicator, *, drop_markers=True): first_line_end = s.index('\n') # strip markers except on first line s = s[:first_line_end] + _remove_chars(s[first_line_end:], - FLAGS) + SDMX_2_1_FLAGS) la_data = read_eurostat(StringIO(s)) From 9e2dc26cc19089f03ec272a307fcec13775f5675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 16 Dec 2025 18:00:46 +0100 Subject: [PATCH 3/6] FIX: fixed building the package --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4f7ab52..adefc2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,10 @@ Homepage = "https://github.com/larray-project/larray_eurostat" Repository = "https://github.com/larray-project/larray_eurostat" Issues = "https://github.com/larray-project/larray_eurostat/issues" +[tool.setuptools.packages.find] +where = ["."] +namespaces = false + [tool.pytest.ini_options] minversion = "6.0" testpaths = [ From 02866079671ca02fe150a448e2921ed0f59c5593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Wed, 17 Dec 2025 17:46:50 +0100 Subject: [PATCH 4/6] DOC: update authors IIRC, Geert initially wrote eurostat_get so should be first --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index adefc2a..8179131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,10 +11,10 @@ description = "Additional package to import Eurostat files using LArray" readme = "README.rst" authors = [ - {name = "Gaetan de Menten", email = "gdementen@gmail.com"}, {name = "Geert Bryon"}, - {name = "Alix Damman"}, - {name = "Johan Duyck"}, + {name = "Gaetan de Menten", email = "gdementen@gmail.com"}, + {name = "Alix Damman", email = "ald@plan.be"}, + {name = "Yannick Van den Abbeel"}, ] classifiers = [ "Development Status :: 4 - Beta", From bb1469b9fd05a42778b48b6045ee95239c93f195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Wed, 17 Dec 2025 18:02:29 +0100 Subject: [PATCH 5/6] CLN: explicit README content type --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8179131..083ddae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ requires = [ name = "larray_eurostat" version = "0.35-dev" description = "Additional package to import Eurostat files using LArray" -readme = "README.rst" +readme = { file = "README.rst", content-type = "text/x-rst" } authors = [ {name = "Geert Bryon"}, From 4157ee5ec23db3ae3d1e7f7469f1f786fc8a6443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 23 Dec 2025 13:36:43 +0100 Subject: [PATCH 6/6] FIX: ignore @C in data cells --- larray_eurostat/tsv.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py index a2a6915..91898f8 100644 --- a/larray_eurostat/tsv.py +++ b/larray_eurostat/tsv.py @@ -64,10 +64,12 @@ def _get_one(indicator, *, drop_markers=True): try: s = fgz.read() if drop_markers: - first_line_end = s.index('\n') + first_line_end = s.index('\n') + 1 # strip markers except on first line - s = s[:first_line_end] + _remove_chars(s[first_line_end:], - SDMX_2_1_FLAGS) + header_line = s[:first_line_end] + data_lines = s[first_line_end:].replace('@C', '') + s = header_line + _remove_chars(data_lines, + SDMX_2_1_FLAGS) la_data = read_eurostat(StringIO(s))