LinkedEarth · khider · May 5, 2026 · Apr 23, 2026 · May 4, 2026 · May 4, 2026
diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py
@@ -127,7 +127,42 @@ def __iadd__(self, other):
         return self
 
 
-    def search_studies(self, **kwargs):
+    def search_studies(
+        self,
+        xml_id: int | str | None = None,
+        noaa_id: int | str | None = None,
+        search_text: str | None = None,
+        data_type_id: str | None = None,
+        investigators: str | list[str] | None = None,
+        investigators_and_or: str = "or",
+        locations: str | list[str] | None = None,
+        locations_and_or: str = "or",
+        keywords: str | list[str] | None = None,
+        keywords_and_or: str = "or",
+        species: str | list[str] | None = None,
+        species_and_or: str = "or",
+        variable_name: str | list[str] | None = None,
+        variable_name_and_or: str = "or",
+        cv_materials: str | list[str] | None = None,
+        cv_materials_and_or: str = "or",
+        cv_seasonalities: str | list[str] | None = None,
+        cv_seasonalities_and_or: str = "or",
+        min_lat: int | None = None,
+        max_lat: int | None = None,
+        min_lon: int | None = None,
+        max_lon: int | None = None,
+        min_elevation: int | None = None,
+        max_elevation: int | None = None,
+        earliest_year: int | None = None,
+        latest_year: int | None = None,
+        time_format: str | None = None,
+        time_method: str | None = None,
+        reconstruction: bool | None = None,
+        recent: bool = False,
+        limit: int = 100,
+        skip: int | None = None,
+        data_publisher: str = "NOAA",
+    ):
         r"""
         Search for NOAA studies using the specified parameters.
 
@@ -147,7 +182,7 @@ def search_studies(self, **kwargs):
             General text search across study content. Supports wildcards (%) and logical operators (AND, OR).
             Examples: 'younger dryas', 'loess AND stratigraphy'
 
-        data_publisher : by default 'NOAA'
+        data_publisher : str, default "NOAA"
             Choose from: 'NOAA', 'NEOTOMA', or 'PANGAEA'.
             Example: 'NOAA'
 
@@ -230,9 +265,6 @@ def search_studies(self, **kwargs):
 
         limit : int, default 100
             Number of studies to return (PyleoTUPS default).
-
-        skip : int, 
-            Number of studies to skip (for paging). Paired with `limit`.
 
         skip : int, optional
             Number of studies to skip (for pagination). Use with ``limit`` to page through results.
@@ -267,7 +299,7 @@ def search_studies(self, **kwargs):
         Time window defaults. If either ``earliest_year`` or ``latest_year`` is provided and neither ``time_format``
         nor ``time_method`` is supplied, ``time_format`` defaults to ``'CE'`` (a note is recorded).
 
-        Unsupported parameters. ``headersOnly`` and ``skip`` are not supported by PyleoTUPS and are ignored if passed.
+        Unsupported parameters. ``headersOnly`` is not supported by PyleoTUPS and ignored if passed.
 
         Boolean normalization. Parameters expected as ``'Y'/'N'`` accept: True/False, or strings like
         ``"true"|"yes"|"y"|"1"`` → ``'Y'`` and ``"false"|"no"|"n"|"0"`` → ``'N'``.
@@ -325,7 +357,7 @@ def search_studies(self, **kwargs):
         .. jupyter-execute::
 
             ### Multiple investigators (AND by default)
-            df_multinv_and = ds.search_studies(investigators=["Wahl, E.R.", "Vose, R.S."], investigatorsAndOr = "and")
+            df_multinv_and = ds.search_studies(investigators=["Wahl, E.R.", "Vose, R.S."], investigators_and_or="and")
             df_multinv_and.head()
 
         .. jupyter-execute::
@@ -405,6 +437,10 @@ def search_studies(self, **kwargs):
             df_skip.head()
         """
 
+        kwargs = locals().copy()
+
+        kwargs.pop("self")
+
         if "headers_only" in kwargs:
             log.warning("Keyword Argument Pair : 'headers_only' is not supported and will be ignored while making requests.")
             kwargs.pop("headers_only", None)

diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py
@@ -62,6 +62,53 @@ def __init__(self, cache_dir: Optional[str] = None, auth_token: Optional[str] =
         # keys: StudyID (DOI/URI) -> {'panobj': PanDataSet or None, 'summary': normalized_dict}
         self.studies: Dict[str, PangaeaStudy] = {}
 
+    def __add__(self, other):
+        if not isinstance(other, PangaeaDataset):
+            return NotImplemented
+
+        merged = PangaeaDataset(cache_dir=self.cache_dir, auth_token=self.auth_token)
+
+        # Start with a shallow copy of left's studies
+        merged.studies = dict(self.studies)
+
+        # Union by StudyID. If duplicate ID appears, keep left's version
+        # but sanity-check equality and warn if they differ.
+        for sid, study in other.studies.items():
+            if sid in merged.studies:
+                try:
+                    check_same = (merged.studies[sid].to_summary_dict() == study.to_summary_dict())
+                except Exception:
+                    check_same = False
+                if not check_same:
+                    logger.warning(
+                        f"PangaeaDataset union: duplicate StudyID {sid} with differing content. "
+                        "Keeping left-hand version."
+                    )
+            else:
+                merged.studies[sid] = study
+
+        return merged
+
+    def __iadd__(self, other):
+        if not isinstance(other, PangaeaDataset):
+            return NotImplemented
+
+        for sid, study in other.studies.items():
+            if sid in self.studies:
+                try:
+                    check_same = (self.studies[sid].to_summary_dict() == study.to_summary_dict())
+                except Exception:
+                    check_same = False
+                if not check_same:
+                    logger.warning(
+                        f"PangaeaDataset in-place union: duplicate StudyID {sid} with differing content. "
+                        "Keeping existing version."
+                    )
+            else:
+                self.studies[sid] = study
+
+        return self
+
     @staticmethod
     def _normalize_id(study_id: str) -> int:
         """
@@ -139,14 +186,23 @@ def _resolve_and_register_ids(self, study_ids):
     # -------------------------
     # search_studies: q, bbox, keywords -> registers studies and returns same style as Dataset.search_studies (DataFrame)
     # -------------------------
-    def search_studies(self,
-                #    q: Optional[str] = None,
-                #    study_ids: Optional[Union[int, str, List]] = None,
-                #    bbox: Optional[Tuple[float, float, float, float]] = None,
-                #    limit: int = 10,
-                #    offset: int = 0,
-                #    display: bool = False
-                **kwargs) -> Optional[pd.DataFrame]:
+    def search_studies(
+            self,
+            study_ids: int | str | list[int | str] | None = None,
+            topic: str | list[str] | None = None,
+            topic_and_or: str = "or",
+            search_text: str | None = None,
+            investigators: str | list[str] | None = None,
+            investigators_and_or: str = "and",
+            variable_name: str | list[str] | None = None,
+            variable_name_and_or: str = "and",
+            min_lat: float | None = None,
+            max_lat: float | None = None,
+            min_lon: float | None = None,
+            max_lon: float | None = None,
+            limit: int = 100,
+            skip: int = 0,
+            ) -> Optional[pd.DataFrame]:
         """
         Search PANGAEA and register results in self.studies.
 
@@ -328,6 +384,10 @@ def search_studies(self,
             )
             df.head()
         """
+
+        kwargs = locals().copy()
+        kwargs.pop("self")
+        self.studies.clear()
         study_ids = kwargs.get("study_ids")
 
         # -------------------------------------------
@@ -414,7 +474,8 @@ def get_summary(self) -> pd.DataFrame:
         pandas.DataFrame
             Return a DataFrame summarizing all loaded/registered PANGAEA datasets.
             ["StudyID","StudyName","EarliestYearBP","MostRecentYearBP",
-            "EarliestYearCE","MostRecentYearCE","StudyNotes","ScienceKeywords","Investigators",
+            "EarliestYearCE","MostRecentYearCE","Coverage [S, N, W, E]",
+            "StudyNotes","ScienceKeywords","Investigators",
             "Publications","Sites","Funding"]
         """
         rows = []

diff --git a/pyleotups/utils/PangaeaStudy.py b/pyleotups/utils/PangaeaStudy.py
@@ -251,6 +251,48 @@ def _extract_temporal_extent(
 
         return earliest_bp, latest_bp, earliest_ce, latest_ce
 
+    def _compute_coverage(self) -> Optional[Tuple[float, float, float, float]]:
+        """
+        Compute consolidated geographic coverage for the study events.
+
+        The coverage is based on all event latitude/longitude pairs.
+        If latitude2/longitude2 are not provided for an event, the single
+        coordinate is reused for both bounds.
+
+        Returns
+        -------
+        tuple or None
+            (MinLatitude, MaxLatitude, MinLongitude, MaxLongitude)
+            or None when no valid coordinates exist.
+        """
+        latitudes = []
+        longitudes = []
+
+        for ev in self._panobj.events:
+            lat1 = ev.latitude
+            lat2 = ev.latitude2 if getattr(ev, "latitude2", None) is not None else lat1
+            lon1 = ev.longitude
+            lon2 = ev.longitude2 if getattr(ev, "longitude2", None) is not None else lon1
+
+            if lat1 is not None:
+                latitudes.append(lat1)
+            if lat2 is not None:
+                latitudes.append(lat2)
+            if lon1 is not None:
+                longitudes.append(lon1)
+            if lon2 is not None:
+                longitudes.append(lon2)
+
+        if not latitudes or not longitudes:
+            return None
+
+        return (
+            min(latitudes),
+            max(latitudes),
+            min(longitudes),
+            max(longitudes),
+        )
+
     def to_summary_dict(self) -> Dict[str, Any]:
         """
         Convert study metadata to NOAA-style summary dictionary.
@@ -264,17 +306,16 @@ def to_summary_dict(self) -> Dict[str, Any]:
         self.earliest_bp, self.latest_bp, self.earliest_ce, self.latest_ce = (
             self._extract_temporal_extent()
         )
-        # if collection_founds :
-        #     logger.warning(
-        #     f'The Summary Table Below may contain Dataset marked as collection.'
-        #     f'Refer to the "CollectionMembers" column to identify collection datasets and their members.')
+        self.coverage = self._compute_coverage()
+
         return {
             "StudyID": self.study_id,
             "StudyName": ds.title,
             "EarliestYearBP": self.earliest_bp,
             "MostRecentYearBP": self.latest_bp,
             "EarliestYearCE": self.earliest_ce,
             "MostRecentYearCE": self.latest_ce,
+            "Coverage [S, N, W, E]": self.coverage,
             "StudyNotes": ds.abstract,
             "ScienceKeywords": getattr(ds, "keywords", None),
             "Investigators": ", ".join(a.fullname for a in ds.authors),
@@ -306,14 +347,21 @@ def get_geo(self) -> pd.DataFrame:
         """
         rows = []
         for ev in self._panobj.events:
+            lat1 = ev.latitude
+            lon1 = ev.longitude
+            lat2 = ev.latitude2 if getattr(ev, "latitude2", None) is not None else lat1
+            lon2 = ev.longitude2 if getattr(ev, "longitude2", None) is not None else lon1
+
             rows.append(
                 {
                     "StudyID": self.study_id,
                     "SiteID": ev.id,
                     "SiteName": ev.label,
                     "LocationName": ev.location,
-                    "Latitude": ev.latitude,
-                    "Longitude": ev.longitude,
+                    "MinLatitude": min(v for v in [lat1, lat2] if v is not None) if lat1 is not None or lat2 is not None else None,
+                    "MaxLatitude": max(v for v in [lat1, lat2] if v is not None) if lat1 is not None or lat2 is not None else None,
+                    "MinLongitude": min(v for v in [lon1, lon2] if v is not None) if lon1 is not None or lon2 is not None else None,
+                    "MaxLongitude": max(v for v in [lon1, lon2] if v is not None) if lon1 is not None or lon2 is not None else None,
                     "Elevation": ev.elevation,
                 }
             )