Skip to content

Commit cbf21a9

Browse files
committed
Testing pyesgf package
1 parent 33cb775 commit cbf21a9

1 file changed

Lines changed: 93 additions & 9 deletions

File tree

stitches/fx_esgf_api.py

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@
1010
import pandas as pd
1111
import xarray as xr
1212
import numpy as np
13+
import os
14+
from pyesgf.search import SearchConnection
15+
16+
17+
DATA_NODES = [
18+
'esgf.ceda.ac.uk',
19+
'esgf-data.dkrz.de'
20+
]
1321

1422

1523
# Author: Unknown
@@ -132,6 +140,77 @@ def format_esgf_result(result):
132140
return result_df
133141

134142

143+
def get_esgf_data(res, var, scen, esm, mem, start_year, end_year, time_chunk=300):
144+
os.environ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'] = 'True'
145+
for node in DATA_NODES:
146+
print(f'\t - Attempting to use data node {node}:')
147+
try:
148+
# Establish connection to data node
149+
conn = SearchConnection(f'https://{node}/esg-search', distrib=True)
150+
151+
# Initiate query
152+
ctx = conn.new_context(
153+
project='CMIP6',
154+
source_id=esm,
155+
experiment_id=scen,
156+
variable=var,
157+
frequency=res,
158+
variant_label=mem
159+
)
160+
161+
# Number of servers with the data
162+
hit_count = ctx.hit_count
163+
164+
# If no results, throw error to be caught by exception
165+
if hit_count == 0:
166+
raise FileNotFoundError(f"The specified combination did not return any results on {node}.")
167+
168+
# Iterate through servers to try downloading data
169+
for i in range(hit_count):
170+
print(f'\t\t - {hit_count} servers found. Trying server {i+1}.')
171+
try:
172+
# Search current server
173+
result = ctx.search()[i]
174+
# Extract result of search
175+
files = result.file_context().search()
176+
# Extract OpenDAP urls
177+
files_list = [f.opendap_url for f in files]
178+
# Select just relevant files
179+
final_files_list = []
180+
start_years = []
181+
end_years = []
182+
for file in files_list:
183+
file_year_part = file.split('/')[-1].split('_')[-1]
184+
file_start_year = file_year_part.split('-')[0][0:4]
185+
file_end_year = file_year_part.split('-')[1][0:4]
186+
if not ((int(file_end_year) < start_year) | (int(file_start_year) > end_year)):
187+
final_files_list.append(file)
188+
start_years.append(int(file_start_year))
189+
end_years.append(int(file_end_year))
190+
# Check that full time period is covered
191+
if (min(start_years) < start_year) & (max(end_years) > end_year):
192+
# Open data with Xarray
193+
data = xr.open_mfdataset(final_files_list, chunks = {'time':time_chunk})
194+
print(f'\t\t - Success.')
195+
return data
196+
else:
197+
raise FileNotFoundError(f'Not all data available.')
198+
...
199+
except Exception as e:
200+
print(f'\t\t - Failed using server {i+1}.')
201+
continue
202+
...
203+
...
204+
except Exception as e:
205+
print(f'\t - Failed using node {node}')
206+
...
207+
...
208+
209+
print(f'Could not resolve request.')
210+
return None
211+
...
212+
213+
135214
def get_recipe_entry_data(row, res='day', variable = 'tas'):
136215
"""
137216
Downloads the data associated to a given entry in the recipe
@@ -145,16 +224,21 @@ def get_recipe_entry_data(row, res='day', variable = 'tas'):
145224
# Message:
146225
print(f'\t - Searching ESGF for archive data {row.archive_start_yr}-{row.archive_end_yr} to use as target period {row.target_start_yr}-{row.target_end_yr}', flush=True)
147226

148-
# Do the ESGF API search
149-
result = esgf_search(
150-
table_id=res, variable_id=variable, experiment_id=row.archive_experiment,
151-
source_id=row.archive_model, member_id=row.archive_ensemble
152-
)
227+
# # Do the ESGF API search
228+
# result = esgf_search(
229+
# table_id=res, variable_id=variable, experiment_id=row.archive_experiment,
230+
# source_id=row.archive_model, member_id=row.archive_ensemble
231+
# )
153232

154-
# Format the results
155-
result_df = format_esgf_result(result)
233+
# # Format the results
234+
# result_df = format_esgf_result(result)
235+
236+
# # Download the data, ensuring it contains the required period defined by the recipe
237+
# df = get_df_from_esgf(result_df, row.archive_start_yr, row.archive_end_yr)
156238

157-
# Download the data, ensuring it contains the required period defined by the recipe
158-
df = get_df_from_esgf(result_df, row.archive_start_yr, row.archive_end_yr)
239+
df = get_esgf_data(
240+
res=res, var=variable, scen=row.archive_experiment,
241+
mem=row.archive_ensemble, esm=row.archive_model,
242+
start_year=row.archive_start_yr, end_year=row.archive_end_yr)
159243

160244
return df

0 commit comments

Comments
 (0)