Eventdisplay · GernotMaier · Jan 25, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026
diff --git a/docs/changes/39.feature.md b/docs/changes/39.feature.md
@@ -0,0 +1 @@
+Add CTAO-specific support for telescope indexing/sorting and geomagnetic angle calculation by introducing an observatory configuration, new geomagnetic field presets, and updated sorting behavior (mirror area first, then size).
diff --git a/src/eventdisplay_ml/config.py b/src/eventdisplay_ml/config.py
@@ -79,13 +79,21 @@ def configure_training(analysis_type):
         help="Maximum number of CPU cores to use for training.",
         default=1,
     )
+    parser.add_argument(
+        "--observatory",
+        type=str,
+        help="Observatory/site name for geomagnetic field (default: VERITAS).",
+        default="VERITAS",
+    )
 
     model_configs = vars(parser.parse_args())
 
     _logger.info(f"--- XGBoost {analysis_type} training ---")
+    _logger.info(f"Observatory: {model_configs.get('observatory')}")
     _logger.info(f"Telescope multiplicity: {model_configs.get('n_tel')}")
     _logger.info(f"Model output prefix: {model_configs.get('model_prefix')}")
     _logger.info(f"Train vs test fraction: {model_configs['train_test_fraction']}")
+    _logger.info(f"Random state: {model_configs['random_state']}")
     _logger.info(f"Max events: {model_configs['max_events']}")
     _logger.info(f"Max CPU cores: {model_configs['max_cores']}")
 
@@ -171,10 +179,17 @@ def configure_apply(analysis_type):
         help="Maximum number of CPU cores to use for processing.",
         default=1,
     )
+    parser.add_argument(
+        "--observatory",
+        type=str,
+        help="Observatory/site name for geomagnetic field (default: VERITAS).",
+        default="VERITAS",
+    )
 
     model_configs = vars(parser.parse_args())
 
     _logger.info(f"--- XGBoost {analysis_type} evaluation ---")
+    _logger.info(f"Observatory: {model_configs.get('observatory')}")
     _logger.info(f"Input file: {model_configs.get('input_file')}")
     _logger.info(f"Model prefix: {model_configs.get('model_prefix')}")
     _logger.info(f"Output file: {model_configs.get('output_file')}")

diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py
@@ -361,7 +361,7 @@ def _clip_size_array(size_array):
 
 
 def flatten_telescope_data_vectorized(
-    df, n_tel, features, analysis_type, training=True, tel_config=None
+    df, n_tel, features, analysis_type, training=True, tel_config=None, observatory="veritas"
 ):
     """
     Vectorized flattening of telescope array columns.
@@ -394,10 +394,9 @@ def flatten_telescope_data_vectorized(
     n_evt = len(df)
     max_tel_id = tel_config["max_tel_id"] if tel_config else (n_tel - 1)
 
-    # Detect indexing mode and determine which index list to use for remapping
-    has_r_core = _has_field(df, "R_core")
+    # Index remapping mode for CTAO-style variable-length indexing
     index_list_for_remapping = None  # None = telescope-ID-indexed (VERITAS R_core mode)
-    if has_r_core:
+    if observatory.lower() == "veritas":
         _logger.info("Detected VERITAS R_core fixed telescope-ID indexing mode.")
     else:
         _logger.info("Detected CTAO ImgSel_list variable-length indexing mode.")
@@ -474,7 +473,8 @@ def flatten_telescope_data_vectorized(
     index = _get_index(df, n_evt)
     df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config)
     return pd.concat(
-        [df_flat, extra_columns(df, analysis_type, training, index, tel_config)], axis=1
+        [df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)],
+        axis=1,
     )
 
 
@@ -515,8 +515,9 @@ def _to_dense_array(col):
 
 def _get_core_arrays(df):
     """Extract core position arrays from DataFrame."""
-    core_x = _to_numpy_1d(df["Xcore"], np.float32)
-    core_y = _to_numpy_1d(df["Ycore"], np.float32)
+    # Make copies to ensure arrays are writable (some sources return read-only views)
+    core_x = _to_numpy_1d(df["Xcore"], np.float32).copy()
+    core_y = _to_numpy_1d(df["Ycore"], np.float32).copy()
     # Filter out sentinel values and apply physical bounds
     # shower cores beyond +-10 km are cut
     core_x[(core_x <= -90000) | (np.abs(core_x) > 10000)] = np.nan
@@ -543,20 +544,35 @@ def _compute_size_area_sort_indices(size_data, active_mask, tel_config, max_tel_
         if tid <= max_tel_id:
             mirror_lookup[int(tid)] = float(area)
 
-    # Build dense size matrix aligned by telescope ID
-    sizes = np.full((n_evt, max_tel_id + 1), np.nan, dtype=np.float32)
-    col_cap = min(size_data.shape[1], max_tel_id + 1)
-    sizes[:, :col_cap] = size_data[:, :col_cap]
+    # size_data is already in tel_id space (shape: n_evt x (max_tel_id + 1))
+    sizes = size_data
 
-    # Mask telescopes that are not active in this event
-    sizes = np.where(active_mask, sizes, np.nan)
+    # Sort per-event: primary = mirror area (desc), secondary = size (desc)
+    # Area has highest priority ALWAYS, even if size is NaN.
+    # NaN mirror areas go to the very end; within equal area groups,
+    # valid sizes come before NaN sizes, and then by size descending.
+    sort_indices = np.zeros((n_evt, max_tel_id + 1), dtype=int)
 
-    # Prepare sort keys: primary = mirror area (desc), secondary = size (desc)
-    area_key = np.where(np.isnan(mirror_lookup), np.inf, -mirror_lookup)
-    primary_matrix = np.broadcast_to(area_key, (n_evt, max_tel_id + 1))
-    size_key = np.where(np.isnan(sizes), np.inf, -sizes)
+    for evt_idx in range(n_evt):
+        tel_entries = []
+        for tel_idx in range(max_tel_id + 1):
+            area = mirror_lookup[tel_idx]
+            size_val = sizes[evt_idx, tel_idx]
+            # Build sort key:
+            #   1) valid area first (0), NaN area last (1)
+            #   2) area descending via negative value
+            #   3) within same area: valid size first (0), NaN last (1)
+            #   4) size descending via negative value
+            area_valid = 0 if not np.isnan(area) else 1
+            size_valid = 0 if not np.isnan(size_val) else 1
+            area_key = -area if area_valid == 0 else 0.0
+            size_key = -size_val if size_valid == 0 else 0.0
+            tel_entries.append((tel_idx, area_valid, area_key, size_valid, size_key))
+
+        tel_entries.sort(key=lambda x: (x[1], x[2], x[3], x[4]))
+        sort_indices[evt_idx] = np.array([t[0] for t in tel_entries])
 
-    return np.lexsort((size_key, primary_matrix), axis=1)
+    return sort_indices
 
 
 def _to_numpy_1d(x, dtype=np.float32):
@@ -590,7 +606,9 @@ def _get_index(df_like, n):
     return pd.RangeIndex(n)
 
 
-def flatten_feature_data(group_df, ntel, analysis_type, training, tel_config=None):
+def flatten_feature_data(
+    group_df, ntel, analysis_type, training, tel_config=None, observatory="veritas"
+):
     """
     Get flattened features for events.
 
@@ -616,6 +634,7 @@ def flatten_feature_data(group_df, ntel, analysis_type, training, tel_config=Non
         analysis_type=analysis_type,
         training=training,
         tel_config=tel_config,
+        observatory=observatory,
     )
     max_tel_id = tel_config["max_tel_id"] if tel_config else ntel - 1
     excluded_columns = set(features_module.target_features(analysis_type)) | set(
@@ -719,6 +738,7 @@ def load_training_data(model_configs, file_list, analysis_type):
                     analysis_type,
                     training=True,
                     tel_config=tel_config,
+                    observatory=model_configs.get("observatory", "veritas"),
                 )
                 if analysis_type == "stereo_analysis":
                     df_flat["MCxoff"] = _to_numpy_1d(df["MCxoff"], np.float32)
@@ -936,7 +956,7 @@ def _calculate_array_footprint(tel_config, elev_rad, azim_rad, tel_list_matrix):
     return footprints
 
 
-def extra_columns(df, analysis_type, training, index, tel_config=None):
+def extra_columns(df, analysis_type, training, index, tel_config=None, observatory="veritas"):
     """Add extra columns required for analysis type."""
     if analysis_type == "stereo_analysis":
         n = len(index)
@@ -969,6 +989,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None):
             "Geomagnetic_Angle": calculate_geomagnetic_angles(
                 _to_numpy_1d(df["ArrayPointing_Azimuth"], np.float32),
                 _to_numpy_1d(df["ArrayPointing_Elevation"], np.float32),
+                observatory=observatory,
             ),
         }
         # Add array footprint if telescope configuration is available

diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py
@@ -115,6 +115,7 @@ def _regression_features(training):
         *telescope_features("stereo_analysis"),
         "DispNImages",
         "DispTelList_T",
+        "ImgSel_list",
         "Xoff",
         "Yoff",
         "Xoff_intersect",
@@ -138,6 +139,7 @@ def _classification_features():
     var_array = [
         "DispNImages",
         "DispTelList_T",
+        "ImgSel_list",
         "EChi2S",
         "EmissionHeight",
         "EmissionHeightChi2",

diff --git a/src/eventdisplay_ml/geomag.py b/src/eventdisplay_ml/geomag.py
@@ -10,11 +10,21 @@
         "BX": 25.2e-6,  # Tesla
         "BY": 0.0,  # Tesla
         "BZ": 40.88e-6,  # Tesla
-    }
+    },
+    "CTAO-NORTH": {
+        "BX": 30.909e-6,  # Tesla
+        "BY": 0.0,  # Tesla
+        "BZ": 23.409e-6,  # Tesla
+    },
+    "CTAO-SOUTH": {
+        "BX": 20.552e-6,  # Tesla
+        "BY": 0.0,  # Tesla
+        "BZ": -9.367 - 6,  # Tesla
+    },
 }
 
 
-def calculate_geomagnetic_angles(azimuth, elevation, site="VERITAS"):
+def calculate_geomagnetic_angles(azimuth, elevation, observatory="VERITAS"):
     """
     Calculate the angle between the shower direction and the geomagnetic field.
 
@@ -24,21 +34,23 @@ def calculate_geomagnetic_angles(azimuth, elevation, site="VERITAS"):
         Azimuth angles of the showers in degrees.
     elevation : array-like
         Elevation angles of the showers in degrees.
-    site : str
-        Site identifier to get geomagnetic field components.
+    observatory : str
+        Observatory identifier to get geomagnetic field components.
 
     Returns
     -------
     theta_B : array-like
         Angle between shower direction and geomagnetic field in degrees.
     """
+    observatory = observatory.upper()
     try:
-        bx = FIELD_COMPONENTS[site]["BX"]
-        by = FIELD_COMPONENTS[site]["BY"]
-        bz = FIELD_COMPONENTS[site]["BZ"]
+        bx = FIELD_COMPONENTS[observatory]["BX"]
+        by = FIELD_COMPONENTS[observatory]["BY"]
+        bz = FIELD_COMPONENTS[observatory]["BZ"]
     except KeyError as exc:
-        raise KeyError(f"Geomagnetic field components for site '{site}' are not defined.") from exc
-
+        raise KeyError(
+            f"Geomagnetic field components for observatory '{observatory}' are not defined."
+        ) from exc
     # Shower direction unit vector
     sx = np.cos(np.radians(elevation)) * np.cos(np.radians(azimuth))  # North
     sy = np.cos(np.radians(elevation)) * np.sin(np.radians(azimuth))  # East

diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py
@@ -88,8 +88,8 @@ def _load_hyper_parameters_from_file(config_file):
 
 
 def pre_cuts_regression(n_tel):
-    """Get pre-cuts for regression analysis (no multiplicity filter)."""
-    event_cut = ""
+    """Get pre-cuts for regression analysis."""
+    event_cut = "DispNImages >=2"
     if PRE_CUTS_REGRESSION:
         event_cut = " & ".join(f"({c})" for c in PRE_CUTS_REGRESSION)
     _logger.info(f"Pre-cuts (regression): {event_cut if event_cut else 'None'}")

diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py
@@ -246,7 +246,12 @@ def apply_regression_models(df, model_configs):
     n_tel = tel_config["max_tel_id"] + 1 if tel_config else 4
 
     flatten_data = flatten_feature_data(
-        df, n_tel, analysis_type="stereo_analysis", training=False, tel_config=tel_config
+        df,
+        n_tel,
+        analysis_type="stereo_analysis",
+        training=False,
+        tel_config=tel_config,
+        observatory=model_configs.get("observatory", "veritas"),
     )
 
     models = model_configs["models"]
@@ -302,7 +307,12 @@ def apply_classification_models(df, model_configs, threshold_keys):
         _logger.info(f"Processing {len(group_df)} events with bin={e_bin}")
 
         flatten_data = flatten_feature_data(
-            group_df, n_tel, analysis_type="classification", training=False, tel_config=tel_config
+            group_df,
+            n_tel,
+            analysis_type="classification",
+            training=False,
+            tel_config=tel_config,
+            observatory=model_configs.get("observatory", "veritas"),
         )
         model = models[e_bin]["model"]
         flatten_data = flatten_data.reindex(columns=models[e_bin]["features"])

diff --git a/tests/test_imgsel_debug.py b/tests/test_imgsel_debug.py
@@ -65,11 +65,12 @@ def test_imgsel_sorting_and_alignment():
         analysis_type="stereo_analysis",
         training=True,
         tel_config=tel_config,
+        observatory="ctao-north",
     )
 
-    # Expected order by size desc (mirror areas equal): tel 1 then tel 3
-    expected_size_log = [np.log10(3725.51), np.log10(2640.01)]
-    # Disp_T should align with sorted order
+    # Expected order by size desc (mirror areas equal): tel 1 (3725.51) then tel 3 (2640.01)
+    expected_size_log = [np.log10(3725.51), np.log10(2640.01)]  # Descending order
+    # Disp_T aligned to sorted order (tel 1 first, then tel 3)
     expected_disp = [1.56872, 1.50883]
 
     # Assert mirror_area columns exist and equal 100 for sorted positions

diff --git a/tests/test_sort_logic.py b/tests/test_sort_logic.py
@@ -0,0 +1,53 @@
+"""Unit tests for telescope sorting logic (mirror area first, then size)."""
+
+import numpy as np
+
+
+def test_sort_logic_equal_area_by_size():
+    """Test sorting with equal mirror areas: secondary sort by size descending."""
+    max_tel_id = 3
+    mirror_lookup = np.array([np.nan, 100.0, np.nan, 100.0], dtype=np.float32)
+    sizes_row = np.array([np.nan, 3725.51, np.nan, 2640.01], dtype=np.float32)
+
+    # Apply the new sort logic: area priority, then size within equal area
+    tel_entries = []
+    for tel_idx in range(max_tel_id + 1):
+        area = mirror_lookup[tel_idx]
+        size_val = sizes_row[tel_idx]
+        area_valid = 0 if not np.isnan(area) else 1
+        size_valid = 0 if not np.isnan(size_val) else 1
+        area_key = -area if area_valid == 0 else 0.0
+        size_key = -size_val if size_valid == 0 else 0.0
+        tel_entries.append((tel_idx, area_valid, area_key, size_valid, size_key))
+
+    tel_entries.sort(key=lambda x: (x[1], x[2], x[3], x[4]))
+    sort_indices = np.array([t[0] for t in tel_entries])
+
+    # Expect: tel 1 (area 100, size 3725.51), tel 3 (area 100, size 2640.01), then NaN areas
+    assert np.array_equal(sort_indices, np.array([1, 3, 0, 2]))
+
+
+def test_sort_logic_mixed_areas_and_nan_sizes():
+    """Test sorting with mixed mirror areas and some NaN sizes: area priority always."""
+    max_tel_id = 4
+    # Areas: tel 0=200, tel 1=100 (NaN size), tel 2=200, tel 3=100, tel 4=NaN
+    mirror_lookup = np.array([200.0, 100.0, 200.0, 100.0, np.nan], dtype=np.float32)
+    # Sizes: tel 0=1000, tel 1=NaN, tel 2=500, tel 3=2000, tel 4=3000
+    sizes_row = np.array([1000.0, np.nan, 500.0, 2000.0, 3000.0], dtype=np.float32)
+
+    # Apply sorting
+    tel_entries = []
+    for tel_idx in range(max_tel_id + 1):
+        area = mirror_lookup[tel_idx]
+        size_val = sizes_row[tel_idx]
+        area_valid = 0 if not np.isnan(area) else 1
+        size_valid = 0 if not np.isnan(size_val) else 1
+        area_key = -area if area_valid == 0 else 0.0
+        size_key = -size_val if size_valid == 0 else 0.0
+        tel_entries.append((tel_idx, area_valid, area_key, size_valid, size_key))
+
+    tel_entries.sort(key=lambda x: (x[1], x[2], x[3], x[4]))
+    sort_indices = np.array([t[0] for t in tel_entries])
+
+    # Expect: area 200 desc (tel 0 size 1000, tel 2 size 500), then area 100 desc (tel 3 size 2000, tel 1 size NaN), then NaN area (tel 4)
+    assert np.array_equal(sort_indices, np.array([0, 2, 3, 1, 4]))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add CTAO-specific support for telescope indexing/sorting and geomagnetic angle calculation by introducing an observatory configuration, new geomagnetic field presets, and updated sorting behavior (mirror area first, then size).