larray-project · gdementen · Dec 23, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py
@@ -27,28 +27,58 @@ def transform_time_labels(label):
         return str_label
 
 
-EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/"
+EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data"
+
+# References
+# ==========
+# https://sdmx.org/sdmx_cdcl/
+# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_v2_1.docx
+# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_implementation_20-10-2014.pdf
+# A / normal value
+# B / time series break (highest importance)
+# D / definition differs
+# E / estimated value
+# F / forecast value
+# G / experimental value
+# H / missing value; holiday or weekend
+# I / imputed value
+# J / derogation
+# K / Data included in another category
+# L / missing value; data exist but were not collected
+# M / missing value; data cannot exist
+# N / not significant
+# O / missing value
+# P / provisional value
+# Q / missing value; suppressed
+# S / strike and other special events
+# U / low reliability
+# V / unvalidated value
+# W / Includes data from another category
+SDMX_2_1_FLAGS = ': abdefghijklmnopqsuvw'
 
 
 def _get_one(indicator, *, drop_markers=True):
     """Get one Eurostat indicator and return it as an array."""
-    url = f"{EUROSTAT_BASEURL}{indicator}?format=TSV&compressed=true"
+    url = f"{EUROSTAT_BASEURL}/{indicator}?format=TSV&compressed=true"
     with urlopen(url) as f, gzip.open(f, mode='rt') as fgz:    # noqa: S310
         try:
             s = fgz.read()
             if drop_markers:
-                first_line_end = s.index('\n')
+                first_line_end = s.index('\n') + 1
                 # strip markers except on first line
-                s = s[:first_line_end] + _remove_chars(s[first_line_end:], ' dbefcuipsrzn:')
+                header_line = s[:first_line_end]
+                data_lines = s[first_line_end:].replace('@C', '')
+                s = header_line + _remove_chars(data_lines,
+                                                SDMX_2_1_FLAGS)
 
             la_data = read_eurostat(StringIO(s))
-            
+
             # Rename time axis. Rename time labels and reverse them (compatibility old API)
             la_data = la_data.rename(TIME_PERIOD='time')
             if np.issubdtype(la_data.time.dtype, np.character):
                 la_data = la_data.set_labels('time', transform_time_labels)
             la_data = la_data.reverse('time')
-            
+
             # If only one frequency: subset and return without redundant freq Axis (compatibility old API)
             if len(la_data.freq) == 1:
                 return la_data[la_data.freq.i[0]]

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,13 +8,13 @@ requires = [
 name = "larray_eurostat"
 version = "0.35-dev"
 description = "Additional package to import Eurostat files using LArray"
-readme = "README.rst"
+readme = { file = "README.rst", content-type = "text/x-rst" }
 
 authors = [
-  {name = "Gaetan de Menten", email = "[email protected]"},
   {name = "Geert Bryon"},
-  {name = "Alix Damman"},
-  {name = "Johan Duyck"},
+  {name = "Gaetan de Menten", email = "[email protected]"},
+  {name = "Alix Damman", email = "[email protected]"},
+  {name = "Yannick Van den Abbeel"},
 ]
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -48,6 +48,10 @@ Homepage = "https://github.com/larray-project/larray_eurostat"
 Repository = "https://github.com/larray-project/larray_eurostat"
 Issues = "https://github.com/larray-project/larray_eurostat/issues"
 
+[tool.setuptools.packages.find]
+where = ["."]
+namespaces = false
+
 [tool.pytest.ini_options]
 minversion = "6.0"
 testpaths = [