diff --git a/input_processing/recf.py b/input_processing/recf.py index fb736ce2..fc16d170 100644 --- a/input_processing/recf.py +++ b/input_processing/recf.py @@ -369,9 +369,6 @@ def main(reeds_path, inputs_case): resource_adequacy_years ) df_windofs.columns = ['wind-ofs_' + col for col in df_windofs] - else: - # Keep concat inputs consistent when offshore wind is disabled. - df_windofs = pd.DataFrame(index=df_windons.index) ### UPV df_upv = calculate_class_region_cf_hourly( diff --git a/postprocessing/bokehpivot/core.py b/postprocessing/bokehpivot/core.py index c11577fc..2a030342 100644 --- a/postprocessing/bokehpivot/core.py +++ b/postprocessing/bokehpivot/core.py @@ -27,7 +27,7 @@ import bokeh.resources as br import bokeh.embed as be import datetime -import urllib.parse as urlp +import six.moves.urllib.parse as urlp import subprocess as sp import jinja2 as ji import reeds_bokeh as rb @@ -612,36 +612,15 @@ def vizit_report(data_type, data_source, vizit_data, output_dir, auto_open): vizit_commit = '7011d363e40386264bedb3155629729b225fd22e' vizit_url = f'https://raw.githubusercontent.com/mmowers/vizit/{vizit_commit}/index.html' f_out_str = requests.get(vizit_url).text - # Vizit report data can contain NumPy scalars/arrays from pandas outputs; - # normalize them before JSON encoding so report generation does not fail. - data_str = json.dumps(data_dict, separators=(',',':'), default=json_scalar) - config_str = json.dumps(vizit_config, separators=(',',':'), default=json_scalar) - # Use callable replacements so backslashes in generated JSON are treated as - # literal text rather than regex replacement escapes. - f_out_str = re.sub( - 'let config_load = .*;\n', - lambda _: f'let config_load = {config_str};\n', - f_out_str, - 1, - ) - f_out_str = re.sub( - 'let rawData = .*;\n', - lambda _: f'let rawData = {data_str};\n', - f_out_str, - 1, - ) + data_str = json.dumps(data_dict, separators=(',',':')) + config_str = json.dumps(vizit_config, separators=(',',':')) + f_out_str = re.sub('let config_load = .*;\n', f'let config_load = {config_str};\n', f_out_str, 1) + f_out_str = re.sub('let rawData = .*;\n', f'let rawData = {data_str};\n', f_out_str, 1) with open(f'{output_dir}report_vizit.html', 'w') as f_out: f_out.write(f_out_str) if auto_open == 'Yes': sp.Popen(os.path.abspath(f'{output_dir}report_vizit.html'), shell=True) -def json_scalar(obj): - if isinstance(obj, np.generic): - return obj.item() - if isinstance(obj, np.ndarray): - return obj.tolist() - raise TypeError(f'Object of type {obj.__class__.__name__} is not JSON serializable') - def preset_wdg(preset, download_full_source=False): ''' Reset widgets and then set them to that specified in input preset diff --git a/postprocessing/bokehpivot/reeds2.py b/postprocessing/bokehpivot/reeds2.py index ea9a46a8..7d8d0da4 100644 --- a/postprocessing/bokehpivot/reeds2.py +++ b/postprocessing/bokehpivot/reeds2.py @@ -1111,49 +1111,8 @@ def net_co2(dfs, **kw): co2['Cumulative CO2e (MMton)'] /= 1e6 return co2 -# health_damages_caused_r.csv is now written with the current 9-column schema -# from postprocessing/air_quality, while these report paths still use the older -# bokehpivot display names downstream. Keep both lists explicit at the boundary. -HEALTH_DAMAGE_SOURCE_COLUMNS = [ - 'rb', 'year', 'e', 'Emissions (thousand metric tons)', - 'model', 'cr', 'Marginal damage ($/metric ton)', - 'Health damages (billion $)', 'Health damages (lives)' -] -HEALTH_DAMAGE_LEGACY_COLUMNS = [ - 'rb', 'st', 'year', 'e', 'Emissions (thousand metric tons)', - 'model', 'cr', 'Marginal damage ($/metric ton)', - 'Health damages (billion $)', 'Health damages (lives)' -] - -def normalize_health_damage_schema(df): - # Accept either current generator headers or legacy report headers, then - # return the legacy names expected by the existing report calculations. - df = df.copy() - df.rename(columns={ - 'ba': 'rb', - 'pollutant': 'e', - 'tons': 'Emissions (thousand metric tons)', - 'md': 'Marginal damage ($/metric ton)', - 'damage_$': 'Health damages (billion $)', - 'mortality': 'Health damages (lives)', - }, inplace=True) - - if 'st' not in df.columns: - # Current health-damage outputs do not include state. Keep a placeholder - # for older bokehpivot paths; derive this from hierarchy if it is used. - df['st'] = df['rb'] if 'rb' in df.columns else '' - - required = HEALTH_DAMAGE_SOURCE_COLUMNS + ['st'] - missing = [col for col in required if col not in df.columns] - if missing: - raise KeyError('health_damages_caused_r.csv missing columns: {}'.format(missing)) - - return df[required] - # function to process health damage estimates def process_health_damage(df, **kw): - df = normalize_health_damage_schema(df) - # convert to billion $ and inflate series df['Health damages (billion $)'] = inflate_series(df['Health damages (billion $)']) * 1e-9 @@ -1176,27 +1135,24 @@ def process_health_damage(df, **kw): rows = product(*allRows.values()) df_all = pd.DataFrame.from_records(rows, columns=allRows.keys()) df_new = df.merge(df_all, how='outer', on=['model', 'cr', 'e', 'rb', 'year']) - df_new['st'] = df_new['st'].ffill() + df_new['st'] = df_new['st'].interpolate(method="ffill") # sort by category and year df_new = df_new.sort_values(['model', 'cr', 'e', 'rb', 'year']) # interpolate any missing values - df_new = df_new.groupby( - ['model', 'cr', 'e', 'rb'], group_keys=False - ).apply(lambda group: group.interpolate(method='ffill')) + df_new = df_new.groupby(['model', 'cr', 'e', 'rb']).apply(lambda group: group.interpolate(method='ffill')) # sum over rb - df_out = df_new.groupby(['model', 'cr', 'e', 'year'])[[ - 'Emissions (thousand metric tons)', 'Health damages (billion $)', - 'Health damages (lives)', 'Discounted health damages (billion $)' - ]].sum().reset_index() + df_out = df_new.groupby(['model', 'cr', 'e', 'year'])[ + 'Emissions (thousand metric tons)', 'Health damages (billion $)', + 'Health damages (lives)', 'Discounted health damages (billion $)' + ].sum().reset_index() # also sum over pollutant - df_poll_agg = df_new.groupby(['model', 'cr', 'year'])[[ - 'Health damages (billion $)', 'Health damages (lives)', - 'Discounted health damages (billion $)' - ]].sum().reset_index() + df_poll_agg = df_new.groupby(['model', 'cr', 'year'])[ + 'Health damages (billion $)', 'Health damages (lives)', 'Discounted health damages (billion $)' + ].sum().reset_index() df_poll_agg.rename(columns={'Health damages (billion $)' : 'Total health damages (billion $)', 'Health damages (lives)' : 'Total health damages (lives)', @@ -1218,9 +1174,7 @@ def process_social_costs(dfs, **kw): system_costs_agg.rename(columns={'Cost (Bil $)' : 'Cost (Bil $)-system', 'Discounted Cost (Bil $)' : 'Discounted Cost (Bil $)-system'}, inplace=True) - health_costs_agg = health_costs.groupby(['year', 'model', 'cr'])[[ - 'Health damages (billion $)', 'Discounted health damages (billion $)' - ]].sum().reset_index() + health_costs_agg = health_costs.groupby(['year', 'model', 'cr'])['Health damages (billion $)', 'Discounted health damages (billion $)'].sum().reset_index() health_costs_agg.rename(columns={'Health damages (billion $)' : 'Cost (Bil $)-health', 'Discounted health damages (billion $)' : 'Discounted Cost (Bil $)-health'}, inplace=True) @@ -2113,10 +2067,7 @@ def pre_spur(dfs, **kw): ('Health Damages from Emissions', {'file':'health_damages_caused_r.csv', - 'columns': HEALTH_DAMAGE_LEGACY_COLUMNS, - # Preserve the CSV's real headers when the current 9-column output is - # present; process_health_damage normalizes the schema before plotting. - 'allow_mismatched_columns': True, + 'columns': ['rb', 'st', 'year', 'e', 'Emissions (thousand metric tons)', 'model', 'cr', 'Marginal damage ($/metric ton)', 'Health damages (billion $)', 'Health damages (lives)'], 'preprocess': [ {'func': process_health_damage, 'args':{}}, ], @@ -2139,9 +2090,7 @@ def pre_spur(dfs, **kw): {'name': 'df_capex_init', 'file': '../inputs_case/df_capex_init.csv'}, {'name': 'sw', 'file': '../inputs_case/switches.csv', 'header':None, 'columns': ['switch', 'value']}, {'name': 'scalars', 'file': '../inputs_case/scalars.csv', 'header':None, 'columns': ['scalar', 'value', 'comment']}, - # See the Health Damages source above: keep current headers intact - # and normalize them inside process_social_costs/process_health_damage. - {'name': 'health_damages', 'file': 'health_damages_caused_r.csv', 'columns': HEALTH_DAMAGE_LEGACY_COLUMNS, 'allow_mismatched_columns': True}, + {'name': 'health_damages', 'file': 'health_damages_caused_r.csv', 'columns': ['rb', 'st', 'year', 'e', 'Emissions (thousand metric tons)', 'model', 'cr', 'Marginal damage ($/metric ton)', 'Health damages (billion $)', 'Health damages (lives)']}, ], 'preprocess': [ {'func': process_social_costs, 'args': {}}, diff --git a/postprocessing/bokehpivot/reeds_bokeh.py b/postprocessing/bokehpivot/reeds_bokeh.py index c4793f34..105128d2 100644 --- a/postprocessing/bokehpivot/reeds_bokeh.py +++ b/postprocessing/bokehpivot/reeds_bokeh.py @@ -359,15 +359,7 @@ def get_src(scen, src): if 'transpose' in src and src['transpose'] is True: df_src = df_src.T if 'columns' in src: - if len(df_src.columns) == len(src['columns']): - df_src.columns = src['columns'] - elif not src.get('allow_mismatched_columns', False): - df_src.columns = src['columns'] - else: - # Some newer outputs are normalized in their preprocess functions. - # For those opt-in sources, preserve CSV headers when legacy report - # columns no longer match the generated file width. - pass + df_src.columns = src['columns'] df_src.replace('Eps',0, inplace=True) df_src.replace('Undf',0, inplace=True) df_src = df_src.apply(pd.to_numeric, errors='ignore') @@ -627,4 +619,4 @@ def df_to_lowercase(df): for col in df: if df[col].dtype == object: df[col] = df[col].str.lower() - return df + return df \ No newline at end of file