Skip to content
Merged

035 #35

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions larray_eurostat/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,58 @@ def transform_time_labels(label):
return str_label


EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/"
EUROSTAT_BASEURL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data"

# References
# ==========
# https://sdmx.org/sdmx_cdcl/
# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_v2_1.docx
# https://sdmx.org/wp-content/uploads/CL_OBS_STATUS_implementation_20-10-2014.pdf
# A / normal value
# B / time series break (highest importance)
# D / definition differs
# E / estimated value
# F / forecast value
# G / experimental value
# H / missing value; holiday or weekend
# I / imputed value
# J / derogation
# K / Data included in another category
# L / missing value; data exist but were not collected
# M / missing value; data cannot exist
# N / not significant
# O / missing value
# P / provisional value
# Q / missing value; suppressed
# S / strike and other special events
# U / low reliability
# V / unvalidated value
# W / Includes data from another category
SDMX_2_1_FLAGS = ': abdefghijklmnopqsuvw'


def _get_one(indicator, *, drop_markers=True):
"""Get one Eurostat indicator and return it as an array."""
url = f"{EUROSTAT_BASEURL}{indicator}?format=TSV&compressed=true"
url = f"{EUROSTAT_BASEURL}/{indicator}?format=TSV&compressed=true"
with urlopen(url) as f, gzip.open(f, mode='rt') as fgz: # noqa: S310
try:
s = fgz.read()
if drop_markers:
first_line_end = s.index('\n')
first_line_end = s.index('\n') + 1
# strip markers except on first line
s = s[:first_line_end] + _remove_chars(s[first_line_end:], ' dbefcuipsrzn:')
header_line = s[:first_line_end]
data_lines = s[first_line_end:].replace('@C', '')
s = header_line + _remove_chars(data_lines,
SDMX_2_1_FLAGS)

la_data = read_eurostat(StringIO(s))

# Rename time axis. Rename time labels and reverse them (compatibility old API)
la_data = la_data.rename(TIME_PERIOD='time')
if np.issubdtype(la_data.time.dtype, np.character):
la_data = la_data.set_labels('time', transform_time_labels)
la_data = la_data.reverse('time')

# If only one frequency: subset and return without redundant freq Axis (compatibility old API)
if len(la_data.freq) == 1:
return la_data[la_data.freq.i[0]]
Expand Down
12 changes: 8 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ requires = [
name = "larray_eurostat"
version = "0.35-dev"
description = "Additional package to import Eurostat files using LArray"
readme = "README.rst"
readme = { file = "README.rst", content-type = "text/x-rst" }

authors = [
{name = "Gaetan de Menten", email = "[email protected]"},
{name = "Geert Bryon"},
{name = "Alix Damman"},
{name = "Johan Duyck"},
{name = "Gaetan de Menten", email = "[email protected]"},
{name = "Alix Damman", email = "[email protected]"},
{name = "Yannick Van den Abbeel"},
]
classifiers = [
"Development Status :: 4 - Beta",
Expand Down Expand Up @@ -48,6 +48,10 @@ Homepage = "https://github.com/larray-project/larray_eurostat"
Repository = "https://github.com/larray-project/larray_eurostat"
Issues = "https://github.com/larray-project/larray_eurostat/issues"

[tool.setuptools.packages.find]
where = ["."]
namespaces = false

[tool.pytest.ini_options]
minversion = "6.0"
testpaths = [
Expand Down