diff --git a/src/workflow/1_Experiment/0_experiment_manager.py b/src/workflow/1_Experiment/0_experiment_manager.py index 54481a5..86f9196 100644 --- a/src/workflow/1_Experiment/0_experiment_manager.py +++ b/src/workflow/1_Experiment/0_experiment_manager.py @@ -13,7 +13,7 @@ from copy import deepcopy import random import re, linecache, gc, csv, scipy, shutil -from pyDOE import * # SOURCE: https://pypi.org/project/lhsmdu/. https://pythonhosted.org/pyDOE/randomized.html#latin-hypercube +from pydoe import * # SOURCE: https://pypi.org/project/lhsmdu/. https://pythonhosted.org/pyDOE/randomized.html#latin-hypercube import pyarrow import pickle @@ -743,7 +743,7 @@ def main_executer(n1, Executed_Scenario, time_vector, scenario_list,solver,osemo print('!!! At execution, we skip: future ', str_fut, ' and scenario ', str_scen, ' !!!' ) # # -def function_C_mathprog_parallel( fut_index, scen, inherited_scenarios, unpackaged_useful_elements, num_time_slices_SDP): +def function_C_mathprog_parallel( fut_index, scen, inherited_scenarios, unpackaged_useful_elements, num_time_slices_SDP, region): # scenario_list = unpackaged_useful_elements[0] S_DICT_sets_structure = unpackaged_useful_elements[1] @@ -1110,7 +1110,7 @@ def function_C_mathprog_parallel( fut_index, scen, inherited_scenarios, unpackag # # Pre-process the data file to add commodity-technology-mode sets and CRF/PvAnnuity from preprocess_data import main as preprocess_main - preprocess_main(g_path, g_path) + preprocess_main(g_path, g_path, region) # ########################################################################################################################### # Furthermore, we must print the inputs separately for fast deployment of the input matrix: @@ -2944,7 +2944,7 @@ def find_param_indices(parameter, scenario_data, set_values): if scenario_list_print[fut_id_new] in scenario_list: # synthesized_all_data_row = function_C_mathprog_parallel(n2, inherited_scenarios, packaged_useful_elements, num_time_slices_SDP) # sys.exit(6) - p = mp.Process(target=function_C_mathprog_parallel, args=(n2, fut_id_new, inherited_scenarios, packaged_useful_elements, num_time_slices_SDP)) + p = mp.Process(target=function_C_mathprog_parallel, args=(n2, fut_id_new, inherited_scenarios, packaged_useful_elements, num_time_slices_SDP, region)) processes.append(p) p.start() else: @@ -2966,7 +2966,7 @@ def find_param_indices(parameter, scenario_data, set_values): for fut_id_new in range(len(scenario_list_print)): x = len(all_futures)# * len(scenario_list_print) for n in range(x): - function_C_mathprog_parallel(n, fut_id_new, inherited_scenarios, packaged_useful_elements, num_time_slices_SDP) + function_C_mathprog_parallel(n, fut_id_new, inherited_scenarios, packaged_useful_elements, num_time_slices_SDP, region) ''' diff --git a/src/workflow/1_Experiment/preprocess_data.py b/src/workflow/1_Experiment/preprocess_data.py index 9452e1f..6e9d269 100644 --- a/src/workflow/1_Experiment/preprocess_data.py +++ b/src/workflow/1_Experiment/preprocess_data.py @@ -35,7 +35,7 @@ Usage:: - python preprocess_data_muio.py + python preprocess_data_muio.py """ @@ -56,7 +56,7 @@ def parse_set_line(line): return [] -def main(data_infile, data_outfile): +def main(data_infile, data_outfile, region): # ========================================================================= # STEP 1 - Read lines, filtering out any previous preprocessing output @@ -340,14 +340,14 @@ def main(data_infile, data_outfile): ) lines.append("{}:=\n".format(techs_string)) rt = " ".join(str(CapitalRecoveryFactor[t]) for t in tech_list) - lines.append("RE1 {} \n".format(rt)) + lines.append("{} {} \n".format(region, rt)) lines.append(";\n") # Append PvAnnuity param to lines lines.append("param PvAnnuity default 0 : \n") lines.append("{}:=\n".format(techs_string)) rt = " ".join(str(PvAnnuity[t]) for t in tech_list) - lines.append("RE1 {} \n".format(rt)) + lines.append("{} {} \n".format(region, rt)) lines.append(";\n") # ========================================================================= diff --git a/src/workflow/4_PRIM/PRIM_t3f2.yaml b/src/workflow/4_PRIM/PRIM_t3f2.yaml index 1f69aad..cc1547d 100644 --- a/src/workflow/4_PRIM/PRIM_t3f2.yaml +++ b/src/workflow/4_PRIM/PRIM_t3f2.yaml @@ -176,4 +176,10 @@ BAU: 'Scenario1' #----------------------------------------------------------------------------# max_per_batch: 1 #----------------------------------------------------------------------------# + +# Which direction is desirable for each outcome +outcome_directions: + Total System Cost: low + Hydro Generation: high + Electricity Demand: low ######################################################################################## \ No newline at end of file diff --git a/src/workflow/4_PRIM/t3b_sdiscovery/t3f3_prim_manager.py b/src/workflow/4_PRIM/t3b_sdiscovery/t3f3_prim_manager.py index 6d33a91..c7d13ed 100644 --- a/src/workflow/4_PRIM/t3b_sdiscovery/t3f3_prim_manager.py +++ b/src/workflow/4_PRIM/t3b_sdiscovery/t3f3_prim_manager.py @@ -895,7 +895,8 @@ def f2_ipp(lvl, lvl_max, b, o, o_name, per_name, rep_csv, ad_df_grab, add_str = '4. Selecting the data to create tables' print(add_str) pmrep.write(add_str + '\n') - use_pfd = comp_pfd[exp_ID]['Scenario1'] + available_scenarios = list(comp_pfd[exp_ID].keys()) + use_pfd = comp_pfd[exp_ID][available_scenarios[0]] future_list = list(use_pfd.keys()) future_list.sort() @@ -1480,12 +1481,23 @@ def f2_ipp(lvl, lvl_max, b, o, o_name, per_name, rep_csv, ad_df_grab, # Creating the data list for drivers: if fut == 0: for dc in d_data_local_cols: - dict_large_table.update({dc: []}) + # Avoid overwriting an outcome column that + # shares its name with a driver column: + if dc not in o_data_local_cols: + dict_large_table.update({dc: []}) # Storing the data values of drivers: for a_driver in list(d_data.keys()): for dc in d_data_lc_dict[a_driver]: + # Avoid double-counting when an outcome + # column is reused as a driver column with + # the same name (e.g. outcome-as-driver + # via o_id_as_d in the PRIM structure). + # Outcome values were already appended + # in the outcome loop above. + if dc in o_data_local_cols: + continue if inconsistency_exists_glob is False: dict_large_table[dc] += \ @@ -1643,7 +1655,7 @@ def f2_ipp(lvl, lvl_max, b, o, o_name, per_name, rep_csv, ad_df_grab, o_d_col_fam_dict = subtbl_col_cntrl[b][o]['o_d_cfd'] o_d_col_fam_dict_names = subtbl_col_cntrl[b][o]['o_d_cfdn'] - per_list = period_list + per_list = list(subtbl[b][o].keys()) for py in range(len(per_list)): per_name = per_list[py] for fam in range(len(o_d_col_fam_list)): @@ -1661,6 +1673,7 @@ def f2_ipp(lvl, lvl_max, b, o, o_name, per_name, rep_csv, ad_df_grab, this_df_raw = subtbl[b][o][per_name][this_fam_key] this_df_raw = this_df_raw.fillna(0) + this_df_raw = this_df_raw.loc[:, ~this_df_raw.columns.duplicated()] # Filter the normalized dataframe: unique_strats = list(set(this_df_raw['Strat_ID'])) diff --git a/src/workflow/4_PRIM/t3b_sdiscovery/t3f4_range_finder_mapping.py b/src/workflow/4_PRIM/t3b_sdiscovery/t3f4_range_finder_mapping.py index d505756..cbfcb0d 100644 --- a/src/workflow/4_PRIM/t3b_sdiscovery/t3f4_range_finder_mapping.py +++ b/src/workflow/4_PRIM/t3b_sdiscovery/t3f4_range_finder_mapping.py @@ -11,6 +11,7 @@ import pandas as pd import sys +import yaml from copy import deepcopy import xlsxwriter @@ -85,18 +86,11 @@ # that tells the PRIM story # Define the separation between desirable and risk outcomes: -desirable_outcomes = {\ - 'Costs':'low', - 'Emissions':'low', - 'CAPEX':'low', - 'Bus Price':'low', - 'Electricity price':'low'} -risk_outcomes = {\ - 'Costs':'high', - 'Emissions':'high', - 'CAPEX':'high', - 'Bus Price':'high', - 'Electricity price':'high'} +with open('../PRIM_t3f2.yaml', 'r') as f: + prim_cfg = yaml.safe_load(f) +desirable_outcomes = prim_cfg['outcome_directions'] +risk_outcomes = {k: ('high' if v == 'low' else 'low') + for k, v in desirable_outcomes.items()} # create dictionaries apt for periods (2 for this analysis): desi_u_data = {} diff --git a/src/workflow/4_PRIM/t3f2_prim_files_creator.py b/src/workflow/4_PRIM/t3f2_prim_files_creator.py index 75ff664..ddbcc3b 100644 --- a/src/workflow/4_PRIM/t3f2_prim_files_creator.py +++ b/src/workflow/4_PRIM/t3f2_prim_files_creator.py @@ -130,7 +130,10 @@ def f1_create_prim_files(dir_elements, dirl, scen, dict_pfcp, analysis_list, ext = afile_path.suffix.lower() if ext == '.csv': - a_df = pd.read_csv(afile_path, dtype=use_dtype_inp,low_memory=False) + with open(afile_path, 'r') as fh: + head = fh.readline() + sep = ';' if head.count(';') > head.count(',') else ',' + a_df = pd.read_csv(afile_path, dtype=use_dtype_inp, sep=sep, low_memory=False) elif ext == '.parquet': a_df = pd.read_parquet(afile_path) else: @@ -166,7 +169,10 @@ def f1_create_prim_files(dir_elements, dirl, scen, dict_pfcp, analysis_list, ext = file_path.suffix.lower() if ext == '.csv': - df = pd.read_csv(file_path, dtype=use_dtype_out) + with open(file_path, 'r') as fh: + head = fh.readline() + sep = ';' if head.count(';') > head.count(',') else ',' + df = pd.read_csv(file_path, dtype=use_dtype_out, sep=sep) elif ext == '.parquet': df = pd.read_parquet(file_path) else: diff --git a/src/workflow/z_auxiliar_code.py b/src/workflow/z_auxiliar_code.py index cd29d2f..03f8f88 100644 --- a/src/workflow/z_auxiliar_code.py +++ b/src/workflow/z_auxiliar_code.py @@ -4408,13 +4408,20 @@ def data_processor_new(output_file, model_structure, strategy, fut_id, solver, p elif output_file_type == 'parquet': # Ensure consistent column types for col in df_output_sol.columns: - if df_output_sol[col].dtype == "object": # Check for object (potential mixed types) + dtype_str = str(df_output_sol[col].dtype) + is_stringy = pd.api.types.is_string_dtype(df_output_sol[col].dtype) + if is_stringy: try: - # Attempt to convert to numeric - df_output_sol[col] = pd.to_numeric(df_output_sol[col]) - except ValueError: - # If conversion fails, leave as string - # print(f"Column '{col}' contains non-numeric values. Converting to string.") + numeric_col = pd.to_numeric(df_output_sol[col], errors='coerce') + except Exception: + df_output_sol[col] = df_output_sol[col].astype(str) + continue + if numeric_col.notna().any(): + if col == 'YEAR': + df_output_sol[col] = numeric_col.fillna(0).astype(int) + else: + df_output_sol[col] = numeric_col + else: df_output_sol[col] = df_output_sol[col].astype(str) # Change the output name for the Parquet file