From 1bb226cdc50a736bae1e3140e2507a5a44db2406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 16 Dec 2025 17:07:17 +0100
Subject: [PATCH 1/6] CLN: nicer code

---
 larray_eurostat/tsv.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py
index 10676d8..3422704 100644
--- a/larray_eurostat/tsv.py
+++ b/larray_eurostat/tsv.py
@@ -27,28 +27,30 @@ def transform_time_labels(label):
         return str_label
 
 
-EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/"
+EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data"
 
+FLAGS = ': bcdefinprsuz'
 
 def _get_one(indicator, *, drop_markers=True):
     """Get one Eurostat indicator and return it as an array."""
-    url = f"{EUROSTAT_BASEURL}{indicator}?format=TSV&compressed=true"
+    url = f"{EUROSTAT_BASEURL}/{indicator}?format=TSV&compressed=true"
     with urlopen(url) as f, gzip.open(f, mode='rt') as fgz:    # noqa: S310
         try:
             s = fgz.read()
             if drop_markers:
                 first_line_end = s.index('\n')
                 # strip markers except on first line
-                s = s[:first_line_end] + _remove_chars(s[first_line_end:], ' dbefcuipsrzn:')
+                s = s[:first_line_end] + _remove_chars(s[first_line_end:],
+                                                       FLAGS)
 
             la_data = read_eurostat(StringIO(s))
-            
+
             # Rename time axis. Rename time labels and reverse them (compatibility old API)
             la_data = la_data.rename(TIME_PERIOD='time')
             if np.issubdtype(la_data.time.dtype, np.character):
                 la_data = la_data.set_labels('time', transform_time_labels)
             la_data = la_data.reverse('time')
-            
+
             # If only one frequency: subset and return without redundant freq Axis (compatibility old API)
             if len(la_data.freq) == 1:
                 return la_data[la_data.freq.i[0]]

From e6b533c43b78e726a4992ceaece5d740df482989 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 16 Dec 2025 17:10:15 +0100
Subject: [PATCH 2/6] FIX: update flags to ignore for new API

---
 larray_eurostat/tsv.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py
index 3422704..a2a6915 100644
--- a/larray_eurostat/tsv.py
+++ b/larray_eurostat/tsv.py
@@ -29,7 +29,33 @@ def transform_time_labels(label):
 
 EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data"
 
-FLAGS = ': bcdefinprsuz'
+# References
+# ==========
+# https://sdmx.org/sdmx_cdcl/
+# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_v2_1.docx
+# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_implementation_20-10-2014.pdf
+# A / normal value
+# B / time series break (highest importance)
+# D / definition differs
+# E / estimated value
+# F / forecast value
+# G / experimental value
+# H / missing value; holiday or weekend
+# I / imputed value
+# J / derogation
+# K / Data included in another category
+# L / missing value; data exist but were not collected
+# M / missing value; data cannot exist
+# N / not significant
+# O / missing value
+# P / provisional value
+# Q / missing value; suppressed
+# S / strike and other special events
+# U / low reliability
+# V / unvalidated value
+# W / Includes data from another category
+SDMX_2_1_FLAGS = ': abdefghijklmnopqsuvw'
+
 
 def _get_one(indicator, *, drop_markers=True):
     """Get one Eurostat indicator and return it as an array."""
@@ -41,7 +67,7 @@ def _get_one(indicator, *, drop_markers=True):
                 first_line_end = s.index('\n')
                 # strip markers except on first line
                 s = s[:first_line_end] + _remove_chars(s[first_line_end:],
-                                                       FLAGS)
+                                                       SDMX_2_1_FLAGS)
 
             la_data = read_eurostat(StringIO(s))
 

From 9e2dc26cc19089f03ec272a307fcec13775f5675 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 16 Dec 2025 18:00:46 +0100
Subject: [PATCH 3/6] FIX: fixed building the package

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 4f7ab52..adefc2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,10 @@ Homepage = "https://github.com/larray-project/larray_eurostat"
 Repository = "https://github.com/larray-project/larray_eurostat"
 Issues = "https://github.com/larray-project/larray_eurostat/issues"
 
+[tool.setuptools.packages.find]
+where = ["."]
+namespaces = false
+
 [tool.pytest.ini_options]
 minversion = "6.0"
 testpaths = [

From 02866079671ca02fe150a448e2921ed0f59c5593 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 17 Dec 2025 17:46:50 +0100
Subject: [PATCH 4/6] DOC: update authors

IIRC, Geert initially wrote eurostat_get so should be first
---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index adefc2a..8179131 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,10 +11,10 @@ description = "Additional package to import Eurostat files using LArray"
 readme = "README.rst"
 
 authors = [
-  {name = "Gaetan de Menten", email = "gdementen@gmail.com"},
   {name = "Geert Bryon"},
-  {name = "Alix Damman"},
-  {name = "Johan Duyck"},
+  {name = "Gaetan de Menten", email = "gdementen@gmail.com"},
+  {name = "Alix Damman", email = "ald@plan.be"},
+  {name = "Yannick Van den Abbeel"},
 ]
 classifiers = [
     "Development Status :: 4 - Beta",

From bb1469b9fd05a42778b48b6045ee95239c93f195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 17 Dec 2025 18:02:29 +0100
Subject: [PATCH 5/6] CLN: explicit README content type

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8179131..083ddae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ requires = [
 name = "larray_eurostat"
 version = "0.35-dev"
 description = "Additional package to import Eurostat files using LArray"
-readme = "README.rst"
+readme = { file = "README.rst", content-type = "text/x-rst" }
 
 authors = [
   {name = "Geert Bryon"},

From 4157ee5ec23db3ae3d1e7f7469f1f786fc8a6443 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 23 Dec 2025 13:36:43 +0100
Subject: [PATCH 6/6] FIX: ignore @C in data cells

---
 larray_eurostat/tsv.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/larray_eurostat/tsv.py b/larray_eurostat/tsv.py
index a2a6915..91898f8 100644
--- a/larray_eurostat/tsv.py
+++ b/larray_eurostat/tsv.py
@@ -64,10 +64,12 @@ def _get_one(indicator, *, drop_markers=True):
         try:
             s = fgz.read()
             if drop_markers:
-                first_line_end = s.index('\n')
+                first_line_end = s.index('\n') + 1
                 # strip markers except on first line
-                s = s[:first_line_end] + _remove_chars(s[first_line_end:],
-                                                       SDMX_2_1_FLAGS)
+                header_line = s[:first_line_end]
+                data_lines = s[first_line_end:].replace('@C', '')
+                s = header_line + _remove_chars(data_lines,
+                                                SDMX_2_1_FLAGS)
 
             la_data = read_eurostat(StringIO(s))