-
Notifications
You must be signed in to change notification settings - Fork 12
Logic to handle optional_extra in Vision Excel converter input #344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
60011ca
7f00482
5d78037
e375135
d357c8a
ef1ce32
826b266
a9bf00e
c9b3816
1e4b625
2f84ac9
64c2132
8323616
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -337,11 +337,14 @@ | |
| if extra_info is None: | ||
| return | ||
|
|
||
| # Normalize col_def to handle deduplication when optional_extra contains columns also in regular extra | ||
| normalized_col_def = self._normalize_extra_col_def(col_def) | ||
|
|
||
| extra = self._parse_col_def( | ||
| data=data, | ||
| table=table, | ||
| table_mask=table_mask, | ||
| col_def=col_def, | ||
| col_def=normalized_col_def, | ||
| extra_info=None, | ||
| ).to_dict(orient="records") | ||
| for i, xtr in zip(uuids, extra): | ||
|
|
@@ -356,6 +359,59 @@ | |
| else: | ||
| extra_info[i] = xtr | ||
|
|
||
| def _normalize_extra_col_def(self, col_def: Any) -> Any: | ||
|
Check failure on line 362 in src/power_grid_model_io/converters/tabular_converter.py
|
||
| """ | ||
| Normalize extra column definition to eliminate duplicates between regular columns and optional_extra. | ||
| Regular columns take precedence over optional_extra columns. | ||
| Additionally, ensure no duplicates within optional_extra. | ||
| Args: | ||
| col_def: Column definition for extra info that may contain optional_extra sections | ||
| Returns: | ||
| Normalized column definition with duplicates removed from optional_extra | ||
| """ | ||
| if not isinstance(col_def, list): | ||
| return col_def | ||
|
|
||
| # Collect all non-optional_extra column names | ||
| regular_columns = set() | ||
| normalized_list = [] | ||
|
|
||
| for item in col_def: | ||
| if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: | ||
| # This is an optional_extra section - we'll process it later | ||
| normalized_list.append(item) | ||
| else: | ||
| # This is a regular column | ||
| if isinstance(item, str): | ||
| regular_columns.add(item) | ||
| normalized_list.append(item) | ||
|
|
||
| # Now process optional_extra sections and remove duplicates | ||
| final_list = [] | ||
| for item in normalized_list: | ||
| if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: | ||
| optional_cols = item["optional_extra"] | ||
| if isinstance(optional_cols, list): | ||
| # Filter out columns that are already in regular columns | ||
| filtered_optional_cols = [] | ||
| for col in optional_cols: | ||
| if isinstance(col, str) and col in regular_columns: | ||
| continue | ||
| if col not in filtered_optional_cols: | ||
| filtered_optional_cols.append(col) | ||
| # Only include the optional_extra section if it has remaining columns | ||
| if filtered_optional_cols: | ||
| final_list.append({"optional_extra": filtered_optional_cols}) | ||
| else: | ||
| # Keep non-list optional_extra as-is (shouldn't happen but be safe) | ||
| final_list.append(item) | ||
| else: | ||
| final_list.append(item) | ||
|
|
||
| return final_list | ||
|
|
||
| @staticmethod | ||
| def _merge_pgm_data(data: Dict[str, List[np.ndarray]]) -> Dict[str, np.ndarray]: | ||
| """During the conversion, multiple numpy arrays can be produced for the same type of component. These arrays | ||
|
|
@@ -396,6 +452,8 @@ | |
| col_def: Any, | ||
| table_mask: Optional[np.ndarray], | ||
| extra_info: Optional[ExtraInfo], | ||
| *, | ||
| allow_missing: bool = False, | ||
mgovers marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) -> pd.DataFrame: | ||
| """Interpret the column definition and extract/convert/create the data as a pandas DataFrame. | ||
|
|
@@ -404,15 +462,27 @@ | |
| table: str: | ||
| col_def: Any: | ||
| extra_info: Optional[ExtraInfo]: | ||
| allow_missing: bool: If True, missing columns will return empty DataFrame instead of raising KeyError | ||
| Returns: | ||
| """ | ||
| if isinstance(col_def, (int, float)): | ||
| return self._parse_col_def_const(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||
| if isinstance(col_def, str): | ||
| return self._parse_col_def_column_name(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||
| return self._parse_col_def_column_name( | ||
| data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing | ||
| ) | ||
| if isinstance(col_def, dict): | ||
| # Check if this is an optional_extra wrapper | ||
| if len(col_def) == 1 and "optional_extra" in col_def: | ||
| # Extract the list of optional columns and parse as composite with allow_missing=True | ||
| optional_cols = col_def["optional_extra"] | ||
| if not isinstance(optional_cols, list): | ||
| raise TypeError(f"optional_extra value must be a list, got {type(optional_cols).__name__}") | ||
| return self._parse_col_def_composite( | ||
| data=data, table=table, col_def=optional_cols, table_mask=table_mask, allow_missing=True | ||
| ) | ||
|
Comment on lines
+483
to
+485
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the only place where I see I don't think I've ever worked in this repo, so genuinely asking. I'd appreciate tech review over this :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now only |
||
| return self._parse_col_def_filter( | ||
| data=data, | ||
| table=table, | ||
|
|
@@ -421,7 +491,9 @@ | |
| extra_info=extra_info, | ||
| ) | ||
| if isinstance(col_def, list): | ||
| return self._parse_col_def_composite(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||
| return self._parse_col_def_composite( | ||
| data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing | ||
| ) | ||
| raise TypeError(f"Invalid column definition: {col_def}") | ||
|
|
||
| @staticmethod | ||
|
|
@@ -454,6 +526,7 @@ | |
| table: str, | ||
| col_def: str, | ||
| table_mask: Optional[np.ndarray] = None, | ||
| allow_missing: bool = False, | ||
| ) -> pd.DataFrame: | ||
| """Extract a column from the data. If the column doesn't exist, check if the col_def is a special float value, | ||
| like 'inf'. If that's the case, create a single column pandas DataFrame containing the const value. | ||
|
|
@@ -462,6 +535,7 @@ | |
| data: TabularData: | ||
| table: str: | ||
| col_def: str: | ||
| allow_missing: bool: If True, return empty DataFrame when column is missing instead of raising KeyError | ||
| Returns: | ||
|
|
@@ -484,10 +558,17 @@ | |
|
|
||
| try: # Maybe it is not a column name, but a float value like 'inf', let's try to convert the string to a float | ||
| const_value = float(col_def) | ||
| except ValueError: | ||
| # pylint: disable=raise-missing-from | ||
| except ValueError as e: | ||
| if allow_missing: | ||
| # Return empty DataFrame with correct number of rows when column is optional and missing | ||
| self._log.debug( | ||
nitbharambe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| "Optional column not found", | ||
| table=table, | ||
| columns=" or ".join(f"'{col_name}'" for col_name in columns), | ||
| ) | ||
| return pd.DataFrame(index=table_data.index) | ||
| columns_str = " and ".join(f"'{col_name}'" for col_name in columns) | ||
| raise KeyError(f"Could not find column {columns_str} on table '{table}'") | ||
| raise KeyError(f"Could not find column {columns_str} on table '{table}'") from e | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did some thinking on the original code here. It now understand why it did not have
However, I dislike that original design choice. Instead, I am of the opinion that it should be something like if const_value := float_or_none(col_def):
return self._parse_col_def_const(..., const_value=const_value)
# now, we know for sure that it is a column name that is absent
if allow_missing:
return ...
raise KeyError(...)where def float_or_none(value: str) -> float | None:
try:
return float(value)
except ValueError:
return NoneThe reason I do not want to raise in the except block is that the conversion to if the column header is a regular column name:
return column header as regular column name
elif the column header is a floating point value:
return the column header as a floating point value
elif this specific column is optional
return None
else:
raise a KeyError because it was mandatory but it is not foundthis also is an indication as to why the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your suggestion?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i know.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'll add this to the knowledge sharing session of tomorrow. |
||
|
|
||
| return self._parse_col_def_const(data=data, table=table, col_def=const_value, table_mask=table_mask) | ||
|
|
||
|
|
@@ -778,13 +859,15 @@ | |
| table: str, | ||
| col_def: list, | ||
| table_mask: Optional[np.ndarray], | ||
| allow_missing: bool = False, | ||
| ) -> pd.DataFrame: | ||
| """Select multiple columns (each is created from a column definition) and return them as a new DataFrame. | ||
| Args: | ||
| data: TabularData: | ||
| table: str: | ||
| col_def: list: | ||
| allow_missing: bool: If True, skip missing columns instead of raising errors | ||
| Returns: | ||
|
|
@@ -797,10 +880,19 @@ | |
| col_def=sub_def, | ||
| table_mask=table_mask, | ||
| extra_info=None, | ||
| allow_missing=allow_missing, | ||
| ) | ||
| for sub_def in col_def | ||
| ] | ||
| return pd.concat(columns, axis=1) | ||
| # Filter out DataFrames with no columns (from missing optional columns) | ||
| non_empty_columns = [col for col in columns if len(col.columns) > 0] | ||
| if not non_empty_columns: | ||
| # If all columns are missing, return an empty DataFrame with the correct number of rows | ||
| table_data = data[table] | ||
| if table_mask is not None: | ||
| table_data = table_data[table_mask] | ||
| return pd.DataFrame(index=table_data.index) | ||
| return pd.concat(non_empty_columns, axis=1) | ||
|
|
||
| def _get_id(self, table: str, key: Mapping[str, int], name: Optional[str]) -> int: | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
| # | ||
| # SPDX-License-Identifier: MPL-2.0 | ||
| --- | ||
| # Test mapping file for optional_extra feature | ||
| grid: | ||
| nodes: | ||
| node: | ||
| id: | ||
| auto_id: | ||
| key: node_id | ||
| u_rated: voltage | ||
| extra: | ||
| - ID | ||
| - Name | ||
| - optional_extra: | ||
| - GUID | ||
| - StationID | ||
|
|
||
| units: | ||
| V: | ||
| kV: 1000.0 | ||
|
|
||
| substitutions: {} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
| # | ||
| # SPDX-License-Identifier: MPL-2.0 | ||
| --- | ||
| # Test mapping file for optional_extra feature with Vision Excel format | ||
| id_reference: | ||
| nodes_table: Nodes | ||
| number: Number | ||
| node_number: Node.Number | ||
| sub_number: Subnumber | ||
|
|
||
| grid: | ||
| Nodes: | ||
| node: | ||
| id: | ||
| auto_id: | ||
| key: Number | ||
| u_rated: Unom | ||
| extra: | ||
| - ID | ||
| - Name | ||
| - optional_extra: | ||
| - GUID | ||
| - StationID | ||
|
|
||
| units: | ||
| V: | ||
| kV: 1000.0 | ||
|
|
||
| substitutions: {} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <[email protected]> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what happens if an element is specified in both the required and optional?
I believe that the default should be that required precedes optional, so maybe we need to add an explicit test case for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2f84ac9
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what was the behaviour previously when specifying
? Did it append a separate column or did it combine the two?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only one would appear. In this case with the second
IDit won't do anything since it already exists.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in that case, it may be possible to achieve the same result (namely that the required column is always leading) with less new code. Note that that does mean that the
optional_extraneeds to be parsed after the regularextrathings to ensure this works:in addition, the following two should also be equivalent: