1010import pandas as pd
1111import xarray as xr
1212import numpy as np
13+ import os
14+ from pyesgf .search import SearchConnection
15+
16+
17+ DATA_NODES = [
18+ 'esgf.ceda.ac.uk' ,
19+ 'esgf-data.dkrz.de'
20+ ]
1321
1422
1523# Author: Unknown
@@ -132,6 +140,77 @@ def format_esgf_result(result):
132140 return result_df
133141
134142
143+ def get_esgf_data (res , var , scen , esm , mem , start_year , end_year , time_chunk = 300 ):
144+ os .environ ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING' ] = 'True'
145+ for node in DATA_NODES :
146+ print (f'\t - Attempting to use data node { node } :' )
147+ try :
148+ # Establish connection to data node
149+ conn = SearchConnection (f'https://{ node } /esg-search' , distrib = True )
150+
151+ # Initiate query
152+ ctx = conn .new_context (
153+ project = 'CMIP6' ,
154+ source_id = esm ,
155+ experiment_id = scen ,
156+ variable = var ,
157+ frequency = res ,
158+ variant_label = mem
159+ )
160+
161+ # Number of servers with the data
162+ hit_count = ctx .hit_count
163+
164+ # If no results, throw error to be caught by exception
165+ if hit_count == 0 :
166+ raise FileNotFoundError (f"The specified combination did not return any results on { node } ." )
167+
168+ # Iterate through servers to try downloading data
169+ for i in range (hit_count ):
170+ print (f'\t \t - { hit_count } servers found. Trying server { i + 1 } .' )
171+ try :
172+ # Search current server
173+ result = ctx .search ()[i ]
174+ # Extract result of search
175+ files = result .file_context ().search ()
176+ # Extract OpenDAP urls
177+ files_list = [f .opendap_url for f in files ]
178+ # Select just relevant files
179+ final_files_list = []
180+ start_years = []
181+ end_years = []
182+ for file in files_list :
183+ file_year_part = file .split ('/' )[- 1 ].split ('_' )[- 1 ]
184+ file_start_year = file_year_part .split ('-' )[0 ][0 :4 ]
185+ file_end_year = file_year_part .split ('-' )[1 ][0 :4 ]
186+ if not ((int (file_end_year ) < start_year ) | (int (file_start_year ) > end_year )):
187+ final_files_list .append (file )
188+ start_years .append (int (file_start_year ))
189+ end_years .append (int (file_end_year ))
190+ # Check that full time period is covered
191+ if (min (start_years ) < start_year ) & (max (end_years ) > end_year ):
192+ # Open data with Xarray
193+ data = xr .open_mfdataset (final_files_list , chunks = {'time' :time_chunk })
194+ print (f'\t \t - Success.' )
195+ return data
196+ else :
197+ raise FileNotFoundError (f'Not all data available.' )
198+ ...
199+ except Exception as e :
200+ print (f'\t \t - Failed using server { i + 1 } .' )
201+ continue
202+ ...
203+ ...
204+ except Exception as e :
205+ print (f'\t - Failed using node { node } ' )
206+ ...
207+ ...
208+
209+ print (f'Could not resolve request.' )
210+ return None
211+ ...
212+
213+
135214def get_recipe_entry_data (row , res = 'day' , variable = 'tas' ):
136215 """
137216 Downloads the data associated to a given entry in the recipe
@@ -145,16 +224,21 @@ def get_recipe_entry_data(row, res='day', variable = 'tas'):
145224 # Message:
146225 print (f'\t - Searching ESGF for archive data { row .archive_start_yr } -{ row .archive_end_yr } to use as target period { row .target_start_yr } -{ row .target_end_yr } ' , flush = True )
147226
148- # Do the ESGF API search
149- result = esgf_search (
150- table_id = res , variable_id = variable , experiment_id = row .archive_experiment ,
151- source_id = row .archive_model , member_id = row .archive_ensemble
152- )
227+ # # Do the ESGF API search
228+ # result = esgf_search(
229+ # table_id=res, variable_id=variable, experiment_id=row.archive_experiment,
230+ # source_id=row.archive_model, member_id=row.archive_ensemble
231+ # )
153232
154- # Format the results
155- result_df = format_esgf_result (result )
233+ # # Format the results
234+ # result_df = format_esgf_result(result)
235+
236+ # # Download the data, ensuring it contains the required period defined by the recipe
237+ # df = get_df_from_esgf(result_df, row.archive_start_yr, row.archive_end_yr)
156238
157- # Download the data, ensuring it contains the required period defined by the recipe
158- df = get_df_from_esgf (result_df , row .archive_start_yr , row .archive_end_yr )
239+ df = get_esgf_data (
240+ res = res , var = variable , scen = row .archive_experiment ,
241+ mem = row .archive_ensemble , esm = row .archive_model ,
242+ start_year = row .archive_start_yr , end_year = row .archive_end_yr )
159243
160244 return df
0 commit comments