From 957a6e594c681c794547279eb19f9b41ca3943a1 Mon Sep 17 00:00:00 2001
From: Dhiren <doswal1982@gmail.com>
Date: Wed, 22 Apr 2026 17:08:00 -0700
Subject: [PATCH 1/5] refactoring input params for search_studies from kwargs
 to respective API names for IDE auto complete

---
 pyleotups/core/NOAADataset.py    | 40 +++++++++++++++++++++++++++++++-
 pyleotups/core/PangaeaDataset.py | 28 +++++++++++++++-------
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py
index 6343276e..c84c2b69 100644
--- a/pyleotups/core/NOAADataset.py
+++ b/pyleotups/core/NOAADataset.py
@@ -127,7 +127,41 @@ def __iadd__(self, other):
         return self
 
     
-    def search_studies(self, **kwargs):
+    def search_studies(
+        self,
+        xml_id: int | str | None = None,
+        noaa_id: int | str | None = None,
+        search_text: str | None = None,
+        investigators: str | list[str] | None = None,
+        investigators_and_or: str = "or",
+        locations: str | list[str] | None = None,
+        locations_and_or: str = "or",
+        keywords: str | list[str] | None = None,
+        keywords_and_or: str = "or",
+        species: str | list[str] | None = None,
+        species_and_or: str = "or",
+        variable_name: str | list[str] | None = None,
+        variable_name_and_or: str = "or",
+        cv_materials: str | list[str] | None = None,
+        cv_materials_and_or: str = "or",
+        cv_seasonalities: str | list[str] | None = None,
+        cv_seasonalities_and_or: str = "or",
+        min_lat: int | None = None,
+        max_lat: int | None = None,
+        min_lon: int | None = None,
+        max_lon: int | None = None,
+        min_elevation: int | None = None,
+        max_elevation: int | None = None,
+        earliest_year: int | None = None,
+        latest_year: int | None = None,
+        time_format: str | None = None,
+        time_method: str | None = None,
+        reconstruction: bool | None = None,
+        recent: bool = False,
+        limit: int = 100,
+        skip: int | None = None,
+        data_publisher: str = "NOAA",
+    ):
         r"""
         Search for NOAA studies using the specified parameters.
 
@@ -405,6 +439,10 @@ def search_studies(self, **kwargs):
             df_skip.head()
         """
 
+        kwargs = locals().copy()
+
+        kwargs.pop("self")
+
         if "headers_only" in kwargs:
             log.warning("Keyword Argument Pair : 'headers_only' is not supported and will be ignored while making requests.")
             kwargs.pop("headers_only", None)
diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py
index 767f987e..55f7c33b 100644
--- a/pyleotups/core/PangaeaDataset.py
+++ b/pyleotups/core/PangaeaDataset.py
@@ -139,14 +139,23 @@ def _resolve_and_register_ids(self, study_ids):
     # -------------------------
     # search_studies: q, bbox, keywords -> registers studies and returns same style as Dataset.search_studies (DataFrame)
     # -------------------------
-    def search_studies(self,
-                #    q: Optional[str] = None,
-                #    study_ids: Optional[Union[int, str, List]] = None,
-                #    bbox: Optional[Tuple[float, float, float, float]] = None,
-                #    limit: int = 10,
-                #    offset: int = 0,
-                #    display: bool = False
-                **kwargs) -> Optional[pd.DataFrame]:
+    def search_studies(
+            self,
+            study_ids: int | str | list[int | str] | None = None,
+            topic: str | list[str] | None = None,
+            topic_and_or: str = "or",
+            search_text: str | None = None,
+            investigators: str | list[str] | None = None,
+            investigators_and_or: str = "and",
+            variable_name: str | list[str] | None = None,
+            variable_name_and_or: str = "and",
+            min_lat: float | None = None,
+            max_lat: float | None = None,
+            min_lon: float | None = None,
+            max_lon: float | None = None,
+            limit: int = 100,
+            skip: int = 0,
+            ) -> Optional[pd.DataFrame]:
         """
         Search PANGAEA and register results in self.studies.
 
@@ -328,6 +337,9 @@ def search_studies(self,
             )
             df.head()
         """
+
+        kwargs = locals().copy()
+        kwargs.pop("self")
         study_ids = kwargs.get("study_ids")
 
         # -------------------------------------------

From 6957e3e8003ab4833db1ec5cdbe8f8f2db3a04b7 Mon Sep 17 00:00:00 2001
From: Dhiren <doswal1982@gmail.com>
Date: Mon, 4 May 2026 12:57:28 -0700
Subject: [PATCH 2/5] Updating PangaeaDataset with Object addition. Balancing
 behavior of PangaeaDataset to clearing studies when redoing search

---
 pyleotups/core/PangaeaDataset.py | 48 ++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py
index 55f7c33b..dc741917 100644
--- a/pyleotups/core/PangaeaDataset.py
+++ b/pyleotups/core/PangaeaDataset.py
@@ -62,6 +62,53 @@ def __init__(self, cache_dir: Optional[str] = None, auth_token: Optional[str] =
         # keys: StudyID (DOI/URI) -> {'panobj': PanDataSet or None, 'summary': normalized_dict}
         self.studies: Dict[str, PangaeaStudy] = {}
 
+    def __add__(self, other):
+        if not isinstance(other, PangaeaDataset):
+            return NotImplemented
+
+        merged = PangaeaDataset(cache_dir=self.cache_dir, auth_token=self.auth_token)
+
+        # Start with a shallow copy of left's studies
+        merged.studies = dict(self.studies)
+
+        # Union by StudyID. If duplicate ID appears, keep left's version
+        # but sanity-check equality and warn if they differ.
+        for sid, study in other.studies.items():
+            if sid in merged.studies:
+                try:
+                    check_same = (merged.studies[sid].to_summary_dict() == study.to_summary_dict())
+                except Exception:
+                    check_same = False
+                if not check_same:
+                    logger.warning(
+                        f"PangaeaDataset union: duplicate StudyID {sid} with differing content. "
+                        "Keeping left-hand version."
+                    )
+            else:
+                merged.studies[sid] = study
+
+        return merged
+
+    def __iadd__(self, other):
+        if not isinstance(other, PangaeaDataset):
+            return NotImplemented
+
+        for sid, study in other.studies.items():
+            if sid in self.studies:
+                try:
+                    check_same = (self.studies[sid].to_summary_dict() == study.to_summary_dict())
+                except Exception:
+                    check_same = False
+                if not check_same:
+                    logger.warning(
+                        f"PangaeaDataset in-place union: duplicate StudyID {sid} with differing content. "
+                        "Keeping existing version."
+                    )
+            else:
+                self.studies[sid] = study
+
+        return self
+
     @staticmethod
     def _normalize_id(study_id: str) -> int:
         """
@@ -340,6 +387,7 @@ def search_studies(
 
         kwargs = locals().copy()
         kwargs.pop("self")
+        self.studies.clear()
         study_ids = kwargs.get("study_ids")
 
         # -------------------------------------------

From dbd33f0c01a0c5b0b69964abf3b45a3bc620e37c Mon Sep 17 00:00:00 2001
From: Dhiren <doswal1982@gmail.com>
Date: Mon, 4 May 2026 13:19:58 -0700
Subject: [PATCH 3/5] Fixing the documentation for skip parameters

---
 pyleotups/core/NOAADataset.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py
index c84c2b69..056b6f8b 100644
--- a/pyleotups/core/NOAADataset.py
+++ b/pyleotups/core/NOAADataset.py
@@ -264,9 +264,6 @@ def search_studies(
         
         limit : int, default 100
             Number of studies to return (PyleoTUPS default).
-        
-        skip : int, 
-            Number of studies to skip (for paging). Paired with `limit`.
 
         skip : int, optional
             Number of studies to skip (for pagination). Use with ``limit`` to page through results.
@@ -301,7 +298,7 @@ def search_studies(
         Time window defaults. If either ``earliest_year`` or ``latest_year`` is provided and neither ``time_format``
         nor ``time_method`` is supplied, ``time_format`` defaults to ``'CE'`` (a note is recorded).
 
-        Unsupported parameters. ``headersOnly`` and ``skip`` are not supported by PyleoTUPS and are ignored if passed.
+        Unsupported parameters. ``headersOnly`` is not supported by PyleoTUPS and ignored if passed.
 
         Boolean normalization. Parameters expected as ``'Y'/'N'`` accept: True/False, or strings like
         ``"true"|"yes"|"y"|"1"`` → ``'Y'`` and ``"false"|"no"|"n"|"0"`` → ``'N'``.

From 94a9d4f1de25637fae9a20d171c52d1da37bec76 Mon Sep 17 00:00:00 2001
From: Dhiren <doswal1982@gmail.com>
Date: Mon, 4 May 2026 15:10:40 -0700
Subject: [PATCH 4/5] Aligning parameters for NOAADataset.search_studies with
 documentation

---
 pyleotups/core/NOAADataset.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py
index 056b6f8b..d80e3f90 100644
--- a/pyleotups/core/NOAADataset.py
+++ b/pyleotups/core/NOAADataset.py
@@ -132,6 +132,7 @@ def search_studies(
         xml_id: int | str | None = None,
         noaa_id: int | str | None = None,
         search_text: str | None = None,
+        data_type_id: str | None = None,
         investigators: str | list[str] | None = None,
         investigators_and_or: str = "or",
         locations: str | list[str] | None = None,
@@ -181,7 +182,7 @@ def search_studies(
             General text search across study content. Supports wildcards (%) and logical operators (AND, OR).
             Examples: 'younger dryas', 'loess AND stratigraphy'
 
-        data_publisher : by default 'NOAA'
+        data_publisher : str, default "NOAA"
             Choose from: 'NOAA', 'NEOTOMA', or 'PANGAEA'.
             Example: 'NOAA'
 
@@ -356,7 +357,7 @@ def search_studies(
         .. jupyter-execute::
 
             ### Multiple investigators (AND by default)
-            df_multinv_and = ds.search_studies(investigators=["Wahl, E.R.", "Vose, R.S."], investigatorsAndOr = "and")
+            df_multinv_and = ds.search_studies(investigators=["Wahl, E.R.", "Vose, R.S."], investigators_and_or="and")
             df_multinv_and.head()
 
         .. jupyter-execute::

From ce090aa705838c1c3ad147a727d902001bc885ed Mon Sep 17 00:00:00 2001
From: Dhiren <doswal1982@gmail.com>
Date: Tue, 5 May 2026 09:30:25 -0700
Subject: [PATCH 5/5] Adding coverage to Pangaea.get_summary() and min max Lat
 Lon to Pangaea.get_geo()

---
 pyleotups/core/PangaeaDataset.py |  3 +-
 pyleotups/utils/PangaeaStudy.py  | 60 ++++++++++++++++++++++++++++----
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py
index dc741917..5bde6834 100644
--- a/pyleotups/core/PangaeaDataset.py
+++ b/pyleotups/core/PangaeaDataset.py
@@ -474,7 +474,8 @@ def get_summary(self) -> pd.DataFrame:
         pandas.DataFrame
             Return a DataFrame summarizing all loaded/registered PANGAEA datasets.
             ["StudyID","StudyName","EarliestYearBP","MostRecentYearBP",
-            "EarliestYearCE","MostRecentYearCE","StudyNotes","ScienceKeywords","Investigators",
+            "EarliestYearCE","MostRecentYearCE","Coverage [S, N, W, E]",
+            "StudyNotes","ScienceKeywords","Investigators",
             "Publications","Sites","Funding"]
         """
         rows = []
diff --git a/pyleotups/utils/PangaeaStudy.py b/pyleotups/utils/PangaeaStudy.py
index b3cb4f01..186cfbfd 100644
--- a/pyleotups/utils/PangaeaStudy.py
+++ b/pyleotups/utils/PangaeaStudy.py
@@ -251,6 +251,48 @@ def _extract_temporal_extent(
 
         return earliest_bp, latest_bp, earliest_ce, latest_ce
 
+    def _compute_coverage(self) -> Optional[Tuple[float, float, float, float]]:
+        """
+        Compute consolidated geographic coverage for the study events.
+
+        The coverage is based on all event latitude/longitude pairs.
+        If latitude2/longitude2 are not provided for an event, the single
+        coordinate is reused for both bounds.
+
+        Returns
+        -------
+        tuple or None
+            (MinLatitude, MaxLatitude, MinLongitude, MaxLongitude)
+            or None when no valid coordinates exist.
+        """
+        latitudes = []
+        longitudes = []
+
+        for ev in self._panobj.events:
+            lat1 = ev.latitude
+            lat2 = ev.latitude2 if getattr(ev, "latitude2", None) is not None else lat1
+            lon1 = ev.longitude
+            lon2 = ev.longitude2 if getattr(ev, "longitude2", None) is not None else lon1
+
+            if lat1 is not None:
+                latitudes.append(lat1)
+            if lat2 is not None:
+                latitudes.append(lat2)
+            if lon1 is not None:
+                longitudes.append(lon1)
+            if lon2 is not None:
+                longitudes.append(lon2)
+
+        if not latitudes or not longitudes:
+            return None
+
+        return (
+            min(latitudes),
+            max(latitudes),
+            min(longitudes),
+            max(longitudes),
+        )
+
     def to_summary_dict(self) -> Dict[str, Any]:
         """
         Convert study metadata to NOAA-style summary dictionary.
@@ -264,10 +306,8 @@ def to_summary_dict(self) -> Dict[str, Any]:
         self.earliest_bp, self.latest_bp, self.earliest_ce, self.latest_ce = (
             self._extract_temporal_extent()
         )
-        # if collection_founds :
-        #     logger.warning(
-        #     f'The Summary Table Below may contain Dataset marked as collection.'
-        #     f'Refer to the "CollectionMembers" column to identify collection datasets and their members.')
+        self.coverage = self._compute_coverage()
+
         return {
             "StudyID": self.study_id,
             "StudyName": ds.title,
@@ -275,6 +315,7 @@ def to_summary_dict(self) -> Dict[str, Any]:
             "MostRecentYearBP": self.latest_bp,
             "EarliestYearCE": self.earliest_ce,
             "MostRecentYearCE": self.latest_ce,
+            "Coverage [S, N, W, E]": self.coverage,
             "StudyNotes": ds.abstract,
             "ScienceKeywords": getattr(ds, "keywords", None),
             "Investigators": ", ".join(a.fullname for a in ds.authors),
@@ -306,14 +347,21 @@ def get_geo(self) -> pd.DataFrame:
         """
         rows = []
         for ev in self._panobj.events:
+            lat1 = ev.latitude
+            lon1 = ev.longitude
+            lat2 = ev.latitude2 if getattr(ev, "latitude2", None) is not None else lat1
+            lon2 = ev.longitude2 if getattr(ev, "longitude2", None) is not None else lon1
+
             rows.append(
                 {
                     "StudyID": self.study_id,
                     "SiteID": ev.id,
                     "SiteName": ev.label,
                     "LocationName": ev.location,
-                    "Latitude": ev.latitude,
-                    "Longitude": ev.longitude,
+                    "MinLatitude": min(v for v in [lat1, lat2] if v is not None) if lat1 is not None or lat2 is not None else None,
+                    "MaxLatitude": max(v for v in [lat1, lat2] if v is not None) if lat1 is not None or lat2 is not None else None,
+                    "MinLongitude": min(v for v in [lon1, lon2] if v is not None) if lon1 is not None or lon2 is not None else None,
+                    "MaxLongitude": max(v for v in [lon1, lon2] if v is not None) if lon1 is not None or lon2 is not None else None,
                     "Elevation": ev.elevation,
                 }
             )