PowerGridModel · Jerry-Jinfeng-Guo · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 27, 2025
diff --git a/docs/converters/vision_converter.md b/docs/converters/vision_converter.md
@@ -52,6 +52,48 @@ Vision introduced UUID based identifier system since version 9.7. It is implemen
 
 An examplery usage can be found in the example notebook as well as in the test cases.
 
+## Optional extra columns
+
+When working with Vision Excel exports, some metadata columns (like `GUID` or `StationID`) may not always be present, especially in partial exports. The `optional_extra` feature allows you to specify columns that should be included in `extra_info` if present, but won't cause conversion failure if missing.
+
+**Syntax:**
+```yaml
+grid:
+  Transformers:
+    transformer:
+      id:
+        auto_id:
+          key: Number
+      # ... other fields ...
+      extra:
+        - ID            # Required - fails if missing
+        - Name          # Required - fails if missing
+        - optional_extra:
+            - GUID      # Optional - skipped if missing
+            - StationID # Optional - skipped if missing
+```
+
+**Behavior:**
+- Required columns (listed directly under `extra`) will cause a KeyError if missing
+- Optional columns (nested under `optional_extra`) are silently skipped if not found
+- If some optional columns are present and others missing, only the present ones are included in `extra_info`
+- This feature is particularly useful for handling different Vision export configurations or versions
+
+**Duplicate handling:**
+When a column appears in both the regular `extra` list and within `optional_extra`, the regular `extra` entry takes precedence and duplicates are automatically eliminated from `optional_extra`:
+
+```yaml
+extra:
+  - ID              # Regular column - always processed
+  - Name            # Regular column - always processed  
+  - optional_extra:
+      - ID          # Duplicate - automatically removed
+      - GUID        # Unique optional - processed if present
+      - StationID   # Unique optional - processed if present
+```
+
+In this example, `ID` will only be processed once (from the regular `extra` list), while `GUID` and `StationID` are processed as optional columns. This prevents duplicate data in the resulting `extra_info` and ensures consistent behavior regardless of column ordering.
+
 ## Common/Known issues related to Vision
 So far we have the following issue known to us related to Vision exported spread sheets. We provide a solution from user perspective to the best of our knowledge.
 

diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py
@@ -337,11 +337,14 @@
         if extra_info is None:
             return
 
+        # Normalize col_def to handle deduplication when optional_extra contains columns also in regular extra
+        normalized_col_def = self._normalize_extra_col_def(col_def)
+
         extra = self._parse_col_def(
             data=data,
             table=table,
             table_mask=table_mask,
-            col_def=col_def,
+            col_def=normalized_col_def,
             extra_info=None,
         ).to_dict(orient="records")
         for i, xtr in zip(uuids, extra):
@@ -356,6 +359,59 @@
                 else:
                     extra_info[i] = xtr
 
+    def _normalize_extra_col_def(self, col_def: Any) -> Any:
+        """
+        Normalize extra column definition to eliminate duplicates between regular columns and optional_extra.
+        Regular columns take precedence over optional_extra columns.
+        Additionally, ensure no duplicates within optional_extra.
+
+        Args:
+            col_def: Column definition for extra info that may contain optional_extra sections
+
+        Returns:
+            Normalized column definition with duplicates removed from optional_extra
+        """
+        if not isinstance(col_def, list):
+            return col_def
+
+        # Collect all non-optional_extra column names
+        regular_columns = set()
+        normalized_list = []
+
+        for item in col_def:
+            if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item:
+                # This is an optional_extra section - we'll process it later
+                normalized_list.append(item)
+            else:
+                # This is a regular column
+                if isinstance(item, str):
+                    regular_columns.add(item)
+                normalized_list.append(item)
+
+        # Now process optional_extra sections and remove duplicates
+        final_list = []
+        for item in normalized_list:
+            if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item:
+                optional_cols = item["optional_extra"]
+                if isinstance(optional_cols, list):
+                    # Filter out columns that are already in regular columns
+                    filtered_optional_cols = []
+                    for col in optional_cols:
+                        if isinstance(col, str) and col in regular_columns:
+                            continue
+                        if col not in filtered_optional_cols:
+                            filtered_optional_cols.append(col)
+                    # Only include the optional_extra section if it has remaining columns
+                    if filtered_optional_cols:
+                        final_list.append({"optional_extra": filtered_optional_cols})
+                else:
+                    # Keep non-list optional_extra as-is (shouldn't happen but be safe)
+                    final_list.append(item)
+            else:
+                final_list.append(item)
+
+        return final_list
+
     @staticmethod
     def _merge_pgm_data(data: Dict[str, List[np.ndarray]]) -> Dict[str, np.ndarray]:
         """During the conversion, multiple numpy arrays can be produced for the same type of component. These arrays
@@ -396,6 +452,8 @@
         col_def: Any,
         table_mask: Optional[np.ndarray],
         extra_info: Optional[ExtraInfo],
+        *,
+        allow_missing: bool = False,
     ) -> pd.DataFrame:
         """Interpret the column definition and extract/convert/create the data as a pandas DataFrame.
 
@@ -404,15 +462,27 @@
           table: str:
           col_def: Any:
           extra_info: Optional[ExtraInfo]:
+          allow_missing: bool: If True, missing columns will return empty DataFrame instead of raising KeyError
 
         Returns:
 
         """
         if isinstance(col_def, (int, float)):
             return self._parse_col_def_const(data=data, table=table, col_def=col_def, table_mask=table_mask)
         if isinstance(col_def, str):
-            return self._parse_col_def_column_name(data=data, table=table, col_def=col_def, table_mask=table_mask)
+            return self._parse_col_def_column_name(
+                data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing
+            )
         if isinstance(col_def, dict):
+            # Check if this is an optional_extra wrapper
+            if len(col_def) == 1 and "optional_extra" in col_def:
+                # Extract the list of optional columns and parse as composite with allow_missing=True
+                optional_cols = col_def["optional_extra"]
+                if not isinstance(optional_cols, list):
+                    raise TypeError(f"optional_extra value must be a list, got {type(optional_cols).__name__}")
+                return self._parse_col_def_composite(
+                    data=data, table=table, col_def=optional_cols, table_mask=table_mask, allow_missing=True
+                )
             return self._parse_col_def_filter(
                 data=data,
                 table=table,
@@ -421,7 +491,9 @@
                 extra_info=extra_info,
             )
         if isinstance(col_def, list):
-            return self._parse_col_def_composite(data=data, table=table, col_def=col_def, table_mask=table_mask)
+            return self._parse_col_def_composite(
+                data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing
+            )
         raise TypeError(f"Invalid column definition: {col_def}")
 
     @staticmethod
@@ -454,6 +526,7 @@
         table: str,
         col_def: str,
         table_mask: Optional[np.ndarray] = None,
+        allow_missing: bool = False,
     ) -> pd.DataFrame:
         """Extract a column from the data. If the column doesn't exist, check if the col_def is a special float value,
         like 'inf'. If that's the case, create a single column pandas DataFrame containing the const value.
@@ -462,6 +535,7 @@
           data: TabularData:
           table: str:
           col_def: str:
+          allow_missing: bool: If True, return empty DataFrame when column is missing instead of raising KeyError
 
         Returns:
 
@@ -484,10 +558,17 @@
 
         try:  # Maybe it is not a column name, but a float value like 'inf', let's try to convert the string to a float
             const_value = float(col_def)
-        except ValueError:
-            # pylint: disable=raise-missing-from
+        except ValueError as e:
+            if allow_missing:
+                # Return empty DataFrame with correct number of rows when column is optional and missing
+                self._log.debug(
+                    "Optional column not found",
+                    table=table,
+                    columns=" or ".join(f"'{col_name}'" for col_name in columns),
+                )
+                return pd.DataFrame(index=table_data.index)
             columns_str = " and ".join(f"'{col_name}'" for col_name in columns)
-            raise KeyError(f"Could not find column {columns_str} on table '{table}'")
+            raise KeyError(f"Could not find column {columns_str} on table '{table}'") from e
 
         return self._parse_col_def_const(data=data, table=table, col_def=const_value, table_mask=table_mask)
 
@@ -778,13 +859,15 @@
         table: str,
         col_def: list,
         table_mask: Optional[np.ndarray],
+        allow_missing: bool = False,
     ) -> pd.DataFrame:
         """Select multiple columns (each is created from a column definition) and return them as a new DataFrame.
 
         Args:
           data: TabularData:
           table: str:
           col_def: list:
+          allow_missing: bool: If True, skip missing columns instead of raising errors
 
         Returns:
 
@@ -797,10 +880,19 @@
                 col_def=sub_def,
                 table_mask=table_mask,
                 extra_info=None,
+                allow_missing=allow_missing,
             )
             for sub_def in col_def
         ]
-        return pd.concat(columns, axis=1)
+        # Filter out DataFrames with no columns (from missing optional columns)
+        non_empty_columns = [col for col in columns if len(col.columns) > 0]
+        if not non_empty_columns:
+            # If all columns are missing, return an empty DataFrame with the correct number of rows
+            table_data = data[table]
+            if table_mask is not None:
+                table_data = table_data[table_mask]
+            return pd.DataFrame(index=table_data.index)
+        return pd.concat(non_empty_columns, axis=1)
 
     def _get_id(self, table: str, key: Mapping[str, int], name: Optional[str]) -> int:
         """

diff --git a/tests/data/config/test_optional_extra_mapping.yaml b/tests/data/config/test_optional_extra_mapping.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+#
+# SPDX-License-Identifier: MPL-2.0
+---
+# Test mapping file for optional_extra feature
+grid:
+  nodes:
+    node:
+      id:
+        auto_id:
+          key: node_id
+      u_rated: voltage
+      extra:
+        - ID
+        - Name
+        - optional_extra:
+            - GUID
+            - StationID
+
+units:
+  V:
+    kV: 1000.0
+
+substitutions: {}
diff --git a/tests/data/vision/vision_optional_extra_full.xlsx b/tests/data/vision/vision_optional_extra_full.xlsx
diff --git a/tests/data/vision/vision_optional_extra_full.xlsx.license b/tests/data/vision/vision_optional_extra_full.xlsx.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/tests/data/vision/vision_optional_extra_mapping.yaml b/tests/data/vision/vision_optional_extra_mapping.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+#
+# SPDX-License-Identifier: MPL-2.0
+---
+# Test mapping file for optional_extra feature with Vision Excel format
+id_reference:
+  nodes_table: Nodes
+  number: Number
+  node_number: Node.Number
+  sub_number: Subnumber
+
+grid:
+  Nodes:
+    node:
+      id:
+        auto_id:
+          key: Number
+      u_rated: Unom
+      extra:
+        - ID
+        - Name
+        - optional_extra:
+            - GUID
+            - StationID
+
+units:
+  V:
+    kV: 1000.0
+
+substitutions: {}
diff --git a/tests/data/vision/vision_optional_extra_mapping.yaml.license b/tests/data/vision/vision_optional_extra_mapping.yaml.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/tests/data/vision/vision_optional_extra_minimal.xlsx b/tests/data/vision/vision_optional_extra_minimal.xlsx
diff --git a/tests/data/vision/vision_optional_extra_minimal.xlsx.license b/tests/data/vision/vision_optional_extra_minimal.xlsx.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/tests/data/vision/vision_optional_extra_partial.xlsx b/tests/data/vision/vision_optional_extra_partial.xlsx
diff --git a/tests/data/vision/vision_optional_extra_partial.xlsx.license b/tests/data/vision/vision_optional_extra_partial.xlsx.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]>

		SPDX-License-Identifier: MPL-2.0