diff --git a/.gitignore b/.gitignore index 0bb9248..5c6586a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,12 @@ # Miscellaneous /setup2.py -/proj/ +/proj/ + +# Ignore outputs folder +output/* + +# Ignore PyCharm files +*.idea +app.log +temp_test.gpkg diff --git a/bin/firedpy.py b/bin/firedpy.py index 7842d89..4e41574 100644 --- a/bin/firedpy.py +++ b/bin/firedpy.py @@ -45,7 +45,11 @@ def str_to_bool(s: str): def test_earthdata_credentials(username: str, password: str) -> None: # Earthdata Login # test url for correct user/password - url = "https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.061/2019.01.01/BROWSE.MCD12Q1.A2019001.h10v09.061.2022169160720.1.jpg" + # base URL for the MCD12Q1 (landcover) product + base_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/MCD12Q1.061/' + # granule-specific URL for testing + granule_url = 'MCD12Q1.A2019001.h10v09.061.2022169160720/BROWSE.MCD12Q1.A2019001.h10v09.061.2022169160720.1.jpg' + url = base_url + granule_url password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password) @@ -204,13 +208,15 @@ def main(): end_year = None if end_year == 0 else end_year if land_cover_type != LandCoverType.NONE: - print('Retrieving landcover...') + print('\nRetrieving landcover data ...') land_cover = LandCover(out_dir, n_cores=n_cores, username=username, password=password) land_cover.get_land_cover(tiles, land_cover_type) eco_region_data = EcoRegion(out_dir) eco_region_data.get_eco_region() + print('\nRetrieving burn data ...') + test_earthdata_credentials(username, password) # test this again to be sure burn_data = BurnData(out_dir, username, password, n_cores) burn_data.get_burns(tiles, start_year, end_year) @@ -222,13 +228,16 @@ def main(): # TODO: This can be parallelized gdf = models.build_points(event_perimeters, shape_file_path=shape_file) + # add in the fire attributes gdf = models.add_fire_attributes(gdf) + # add in the ecoregion information: + gdf = models.add_eco_region_attributes(gdf, eco_region_type, eco_region_level) + # add in the landcover if land_cover_type != LandCoverType.NONE: gdf = models.add_land_cover_attributes(gdf, land_cover_type) gdf = models.process_geometry(gdf) - gdf.to_file('temp_test.gpkg', driver="GPKG") - gdf = models.add_eco_region_attributes(gdf, eco_region_type, eco_region_level) + # gdf.to_file('temp_test.gpkg', driver="GPKG") # testing def generate_path(proj_dir, base_filename, shape_type: ShapeType): """Generate the appropriate file path.""" diff --git a/data/params.txt b/data/params.txt index 5c75715..111d373 100644 --- a/data/params.txt +++ b/data/params.txt @@ -1,14 +1,14 @@ out_dir,Where would you like the output for this run saved?,str,output,none full_csv,Enter (true y yes) if you want a full csv. Enter (false n no) if you want a raw csv,bool,True,true|yes|y|false|no|n -tile_choice,"Would you like the fired product on a) continent b) country c) US state or d) specific MODIS tiles?",str,b,a|b|c|d -tile_name,f,str,conus_ak,none -eco_region_type,Please enter the eco_region type,str,world,world|na -daily,Enter (true y yes) if you want to create the daily polygons or (false n no) for just the event-level perimeter for your analysis area,bool,True,true|yes|y|false|no|n +tile_choice,"Would you like the fired product on a) continent b) country c) US state or d) specific MODIS tiles?",str,d,a|b|c|d +tile_name,f,str,h09v05,none +eco_region_type,Please enter the eco_region type,str,na,world|na +daily,Enter (true y yes) if you want to create the daily polygons or (false n no) for just the event-level perimeter for your analysis area,bool,False,true|yes|y|false|no|n spatial,fPlease enter the number of cells (~463 m resolution) to search for neighboring burn detections in all directions,int,5,none temporal,The number of days between events to search for neighboring burn detections,int,11,none -shape_type,Specify the format of the shapefile you want. Can be 'shp' 'gpkg' or 'both'.,str,both,gpkg|shp|both|none +shape_type,Specify the format of the shapefile you want. Can be 'shp' 'gpkg' or 'both'.,str,gpkg,gpkg|shp|both|none land_cover_type,If you would like to include land cover as an attribute enter the land cover type number you would like to use.\nAvailable land cover categories: \n 0: None \n 1: IGBP global vegetation classification scheme \n 2: University of Maryland (UMD) scheme \n 3: MODIS-derived LAI/fPAR scheme \n 4: MODIS-derived Net Primary Production (NPP) scheme \n 5: Plant Functional Types (PFT) scheme \n,int,1,0|1|2|3|4|5 -username,Enter your NASA Earthdata username,str,erve3705,none +username,Enter your NASA Earthdata username,str,maco4303,none password,Enter your NASA Earthdata password,str,,none start_year,Enter the year you want to start or 0 for all dates,int,0,none end_year,Enter the year you want to end or 0 for all dates,int,0,none diff --git a/src/data_classes.py b/src/data_classes.py index b49a468..21c2fca 100644 --- a/src/data_classes.py +++ b/src/data_classes.py @@ -23,9 +23,14 @@ from tqdm import tqdm import requests from bs4 import BeautifulSoup +from urllib.error import HTTPError, URLError from src.enums import LandCoverType +import warnings +from rasterio.errors import NotGeoreferencedWarning +warnings.filterwarnings("ignore", category=NotGeoreferencedWarning) + PROJECT_DIR = os.path.dirname(os.path.dirname(__file__)) logging.basicConfig(filename='app.log', level=logging.ERROR, @@ -204,32 +209,36 @@ def _verify_hdf_file(file_path) -> bool: return True def _download_task(self, request: Tuple[str, str]): - link = request[0] - dest = request[1] - - print(link, dest) + url, dest = request if os.path.exists(dest): return - pm = urllib.request.HTTPPasswordMgrWithDefaultRealm() - pm.add_password(None, "https://urs.earthdata.nasa.gov", self._username, self._password) - cookie_jar = CookieJar() - opener = urllib.request.build_opener( - urllib.request.HTTPBasicAuthHandler(pm), - urllib.request.HTTPCookieProcessor(cookie_jar) - ) - urllib.request.install_opener(opener) - myrequest = urllib.request.Request(link) - response = urllib.request.urlopen(myrequest) - response.begin() - with open(dest, 'wb') as fd: - while True: - chunk = response.read() - if chunk: - fd.write(chunk) - else: - break + session = requests.Session() + session.auth = (self._username, self._password) + session.headers.update({ + "User-Agent": "firedpy/1.0 (https://github.com/earthlab/firedpy/)", + "Accept": "*/*" + }) + + try: + # Get file with redirect and auth handling + response = session.get(url, stream=True, timeout=60) + if response.status_code == 401: + print(f"🔐 Unauthorized for {url}") + response.raise_for_status() + + # Write to disk + os.makedirs(os.path.dirname(dest), exist_ok=True) + with open(dest, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + except requests.exceptions.HTTPError as e: + print(f"HTTPError for {url}: {e.response.status_code} {e.response.reason}") + except Exception as e: + print(f"Unexpected error for {url}: {str(e)}") @staticmethod def get_all_available_tiles() -> List[str]: @@ -246,17 +255,16 @@ def get_all_available_tiles() -> List[str]: def _download_files(self, download_requests): try: with Pool(self._parallel_cores - 1) as pool: - for _ in tqdm(pool.imap_unordered(self._download_task, download_requests), - total=len(download_requests)): - pass - + pool.map(self._download_task, download_requests) except Exception as pe: try: - _ = [self._download_task(q) for q in tqdm(download_requests, position=0, file=sys.stdout)] + tqdm.write("Parallel download failed, falling back to sequential...") + for q in download_requests: + self._download_task(q) except Exception as e: template = "Download failed: error type {0}:\n{1!r}" message = template.format(type(e).__name__, e.args) - print(message) + tqdm.write(message) def _get_available_year_paths(self, start_year: int = None, end_year: int = None) -> List[str]: # Get available years @@ -297,12 +305,15 @@ def _get_available_files(self, year_path: str, tiles: List[str] = None): class BurnData(LPDAAC): def __init__(self, out_dir: str, username: str, password: str, n_cores: int = None): super().__init__(out_dir) - self._lp_daac_url = 'https://e4ftl01.cr.usgs.gov/MOTA/MCD64A1.061/' - self._base_sftp_folder = os.path.join('data', 'MODIS', 'C61', 'MCD64A1', 'HDF') + self._lp_daac_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/MCD64A1.061/' + # self._base_sftp_folder = os.path.join('data', 'MODIS', 'C61', 'MCD64A1', 'HDF') self._modis_template_path = os.path.join(out_dir, 'rasters', 'mosaic_template.tif') self._record_start_year = 2000 self._parallel_cores = n_cores if n_cores is not None else os.cpu_count() - 1 - self._file_regex = r'MCD64A1' + self._post_regex + self._file_regex = ( + r'MCD64A1\.A(?P\d{4})(?P\d{3})\.' + r'h\d{2}v\d{2}\.061\.\d{13}\.hdf' + ) self._username = username self._password = password @@ -312,58 +323,120 @@ def _generate_local_hdf_dir(self, tile: str) -> str: def _generate_local_hdf_path(self, tile: str, hdf_name: str) -> str: return os.path.join(self.hdf_dir, tile, hdf_name) - def _create_requests(self, available_year_paths: List[str], tiles: List[str]): - download_requests = [] - for year_path in available_year_paths: - - available_files = self._get_available_files(year_path, tiles=tiles) - for file in available_files: - match = re.match(self._file_regex, file) - tile = self._generate_tile(match.groupdict()) - local_file_path = self._generate_local_hdf_path(tile, file) - os.makedirs(os.path.dirname(local_file_path), exist_ok=True) - if os.path.exists(local_file_path): - if not self._verify_hdf_file(local_file_path): - print('Removing', local_file_path) - os.remove(local_file_path) - else: + def _extract_year_from_granule(self, granule_id: str) -> str: + return granule_id.split('.')[1][1:5] + + def _query_cmr_granules(self, tiles: List[str]) -> Dict[str, List[str]]: + """ + Method for generating a list of granule IDs based on MODIS tiles. + :param tiles: + :return: Dictionary of granule IDs grouped by year + """ + if tiles is None: + tiles = self.get_all_available_tiles() + + cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json" + short_name = "MCD64A1" + version = "061" + + tile_patterns = [f"h{tile[1:3]}v{tile[4:]}" for tile in tiles] + + params = { + "short_name": short_name, + "version": version, + "provider": "LPCLOUD", + "page_size": 2000, + "page_num": 1, + } + + granules_by_year = {} + while True: + response = requests.get(cmr_url, params=params) + response.raise_for_status() + items = response.json()["feed"]["entry"] + if not items: + break + + for item in items: + granule_id = item["title"] # e.g. MCD64A1.A2000121.h09v05.061.2021200000000 + if any(tile in granule_id for tile in tile_patterns): + year = granule_id.split('.')[1][1:5] + + # Get the proper HTTPS link to the HDF file + links = item.get("links", []) + hrefs = [l["href"] for l in links if "data#" in l["rel"] and l["href"].endswith(".hdf")] + if not hrefs: continue - download_requests.append( - (urllib.parse.urljoin(urllib.parse.urljoin(self._lp_daac_url, year_path), file), local_file_path) - ) + + url = hrefs[0] + filename = os.path.basename(url) + + granules_by_year.setdefault(year, []).append((url, filename)) + + params["page_num"] += 1 + + return granules_by_year + + def _create_requests(self, granule_entries: List[Tuple[str, str]]): + download_requests = [] + for url, filename in granule_entries: + tile = filename.split('.')[2] + local_file_path = self._generate_local_hdf_path(tile, filename) + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + + if os.path.exists(local_file_path): + if not self._verify_hdf_file(local_file_path): + print('Removing', local_file_path) + os.remove(local_file_path) + else: + continue + + download_requests.append((url, local_file_path)) return download_requests def get_burns(self, tiles: List[str], start_year: int = None, end_year: int = None): """ - This will download the MODIS burn event data set tiles and create a + Method for downloading the MODIS burn event data set tiles, creating a singular mosaic to use as a template file for coordinate reference - information and geometries. - - User manual: - http://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.2.pdf - - Update 02/2021 -> fuoco server transitioned to SFTP Dec 2020 - Update src to use Paramiko SSHClient / SFTPClient - Server-side changes are described in the user manual linked above + information and geometries + :param tiles: + :param start_year: + :param end_year: + :return: + """ - SFTP: - sftp://fire:burnt@fuoco.geog.umd.edu/gfed4/MCD64A1/C6/ - username: fire - password: burnt + print(f"Querying MCD64A1 granules for [{len(tiles)}] tile(s) ...") - """ - # Check into the UMD SFTP fuoco server using Paramiko for tile in tiles: nc_file_name = self._generate_local_nc_path(tile) if os.path.exists(nc_file_name): + print("\tBurn data NetCDF already exists, skipping") continue - print('Finding available files...') - available_year_paths = self._get_available_year_paths(start_year=start_year, end_year=end_year) - download_requests = self._create_requests(available_year_paths, [tile]) - print(download_requests) - self._download_files(download_requests) + granules_by_year = self._query_cmr_granules([tile]) + + # Flatten granule list and filter by year + all_granules = [] + for year, granules in granules_by_year.items(): + if start_year and int(year) < start_year: + continue + if end_year and int(year) > end_year: + continue + all_granules.extend(granules) + + # Create download requests + download_requests = self._create_requests(all_granules) + + # tqdm.write(f"Downloading {len(download_requests)} granules for tile {tile}...") + with tqdm(total=len(download_requests), desc=f"Downloading {tile}", position=0, dynamic_ncols=True) as pbar: + for i, req in enumerate(download_requests): + try: + self._download_task(req) + except Exception as e: + tqdm.write(f"Failed to download {req[0]}: {str(e)}") + pbar.update(1) + self._write_ncs([tile]) def _write_modis_template_file(self): @@ -408,19 +481,23 @@ def _write_ncs(self, tiles: List[str]): """ # Build the net cdfs here fill_value = -9999 + # loop through tile IDs (MODIS tile) for tile_id in tiles: try: nc_file_name = self._generate_local_nc_path(tile_id) if os.path.exists(nc_file_name): continue + print("nc_file_name: ",nc_file_name) hdf_dir = self._generate_local_burn_hdf_dir(tile_id) + print("hdf_dir: ",hdf_dir) files = sorted([os.path.join(hdf_dir, f) for f in os.listdir(hdf_dir) if self._extract_date_parts(f) is not None], key=self._extract_date_parts) if not files: print(f'No hdf files for tile {tile_id} in {hdf_dir}') + print(f"Processing [{len(files)}] hdf files.") # Skip if it exists already if os.path.exists(nc_file_name): @@ -597,12 +674,14 @@ def _verify_hdf_file(file_path) -> bool: class LandCover(LPDAAC): def __init__(self, out_dir: str, n_cores: int = None, username: str = None, password: str = None): super().__init__(out_dir) - self._lp_daac_url = 'https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.061/' - self._date_regex = r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})\/' + # self._lp_daac_url = 'https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.061/' # doesn't work? Use below (MC, July 25) + self._lp_daac_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/MCD12Q1.061/' + # self._date_regex = r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})\/' # this is no longer part of the URL self._parallel_cores = n_cores if n_cores is not None else os.cpu_count() - 1 self._username = username self._password = password - self._file_regex = r'MCD12Q1' + self._post_regex + # self._file_regex = r'MCD12Q1' + self._post_regex # this no longer meets the file convention (MC, July 25) + self._file_regex = r'MCD12Q1\.A\d{7}\.h\d{2}v\d{2}\.061\.\d{13}\.hdf' def _generate_local_hdf_path(self, year: str, remote_name: str) -> str: return os.path.join(self._land_cover_dir, year, remote_name) @@ -610,54 +689,99 @@ def _generate_local_hdf_path(self, year: str, remote_name: str) -> str: def _generate_local_hdf_dir(self, year: str) -> str: return os.path.join(self._land_cover_dir, year) - def _create_requests(self, available_year_paths: List[str], tiles: List[str]): + def _query_cmr_granules(self, tiles: List[str]) -> Dict[str, List[str]]: + """ + Method for generating a list of granule IDs based on MODIS tiles. + :param tiles: + :return: Dictionary of granule IDs grouped by year + """ + if tiles is None: + tiles = self.get_all_available_tiles() + + # define the CMR URL and inputs + cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json" + short_name = "MCD12Q1" + version = "061" + + tile_patterns = [f"h{tile[1:3]}v{tile[4:]}" for tile in tiles] # e.g., 'h10v09' + + params = { + "short_name": short_name, + "version": version, + "provider": "LPCLOUD", + "page_size": 2000, + "page_num": 1, + } + + granules_by_year = {} + while True: + response = requests.get(cmr_url, params=params) + response.raise_for_status() + items = response.json()["feed"]["entry"] + if not items: + break + for item in items: + granule_id = item["title"] + if any(tile in granule_id for tile in tile_patterns): + year = granule_id.split('.')[1][1:5] + granules_by_year.setdefault(year, []).append(granule_id) + params["page_num"] += 1 + + return granules_by_year + + def _create_requests(self, granules: List[str]): download_requests = [] - for year_path in available_year_paths: - year = re.match(self._date_regex, year_path).groupdict()['year'] - - available_files = self._get_available_files(year_path, tiles=tiles) - for file in available_files: - local_file_path = self._generate_local_hdf_path(year, file) - os.makedirs(os.path.dirname(local_file_path), exist_ok=True) - if os.path.exists(local_file_path): - if not self._verify_hdf_file(local_file_path): - print('Removing', local_file_path) - os.remove(local_file_path) - else: - continue - download_requests.append( - (urllib.parse.urljoin(urllib.parse.urljoin(self._lp_daac_url, year_path), file), local_file_path) - ) + for granule in granules: + year = granule.split('.')[1][1:5] + remote_file = f"{granule}.hdf" + local_file_path = self._generate_local_hdf_path(year, remote_file) + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + + if os.path.exists(local_file_path): + if not self._verify_hdf_file(local_file_path): + print('Removing corrupt file:', local_file_path) + os.remove(local_file_path) + else: + continue + + url = f"{self._lp_daac_url}{granule}/{remote_file}" + download_requests.append((url, local_file_path)) return download_requests def _create_annual_mosaic(self, year: str, land_cover_type: LandCoverType = LandCoverType.IGBP): + """ + Create annual mosaic landcover geotiffs from the downloaded HDF files. + :param year: + :param land_cover_type: User-defined landcover type (IGBP default) + :return: Annual mosaic GeoTIFF + """ + # define the output geotiff file name output_file = f"lc_mosaic_{land_cover_type.value}_{year}.tif" - - # Filter available files for the requested tiles + # gather a list of the granules lc_files = [self._generate_local_hdf_path(year, f) for f in os.listdir(self._generate_local_hdf_dir(year)) if re.match(self._file_regex, os.path.basename(f)) is not None] - # Use the sub-dataset name to get the right land cover type + # extract the correct subdatasets from the HDF file + # here we find the correct landcover model datasets = [] for lc_file_path in lc_files: with rasterio.open(lc_file_path) as lc_file: datasets.append([sd for sd in lc_file.subdatasets if str(land_cover_type.value) in sd.lower()][0]) - # Create pointers to the chosen land cover type + # open the raster datasets and grab metadata tiles = [rasterio.open(ds) for ds in datasets] - - # Mosaic them together mosaic, transform = merge(tiles) - - # Get coordinate reference information + # copy the metadata and update projection information crs = tiles[0].meta.copy() - crs.update({"driver": "GTIFF", - "height": mosaic.shape[1], - "width": mosaic.shape[2], - "transform": transform}) + crs.update( + {"driver": "GTiff", + "height": mosaic.shape[1], + "width": mosaic.shape[2], + "transform": transform} + ) - # Save mosaic file + # write out the mosaic file. with rasterio.open(os.path.join(self._mosaics_dir, output_file), "w+", **crs) as dst: dst.write(mosaic) @@ -679,27 +803,31 @@ def get_land_cover(self, tiles: List[str] = None, land_cover_type: LandCoverType aster-ultimate-2018-winter-olympics-observer/. Update 10/2020: Python workflow updated to 3.X specific per documentation - + Update 07/2025: Integration with the CMR API for granule lists by year (M. Cook) """ if tiles is None: tiles = self.get_all_available_tiles() - print('Finding available files...') - available_year_paths = self._get_available_year_paths() + print("Querying MCD12Q1 granules ...") + granules_by_year = self._query_cmr_granules(tiles) + years = sorted(granules_by_year.keys()) - for year_path in tqdm(available_year_paths, position=0, file=sys.stdout): - year = re.match(self._date_regex, year_path).groupdict()['year'] - output_file = f"lc_mosaic_{land_cover_type.value}_{year}.tif" - if os.path.exists(os.path.join(self._mosaics_dir, output_file)): - continue - download_requests = self._create_requests([year_path], tiles) - self._download_files(download_requests) - self._create_annual_mosaic(year, land_cover_type) + with tqdm(total=len(years), desc="", dynamic_ncols=True) as pbar: + for year in years: + pbar.set_description_str(f"Downloading and mosaicking granules for [{year}]") + output_file = f"lc_mosaic_{land_cover_type.value}_{year}.tif" + if os.path.exists(os.path.join(self._mosaics_dir, output_file)): + pbar.update(1) + continue - print("Mosaicking/remosaicking land cover tiles...") + granules = granules_by_year[year] + download_requests = self._create_requests(granules) + self._download_files(download_requests) + self._create_annual_mosaic(str(year), land_cover_type) + pbar.update(1) - # Print location - print(f"Land cover data saved to {self._mosaics_dir}") + tqdm.write(f"\tCompleted for all years: [{min(years)}-{max(years)}]") + tqdm.write(f"\tLandcover data saved to: {self._mosaics_dir}") class EcoRegion(Base): diff --git a/src/model_classes.py b/src/model_classes.py index 5440a15..cb10cd8 100644 --- a/src/model_classes.py +++ b/src/model_classes.py @@ -276,7 +276,11 @@ def _get_available_cells(self): return list(zip(locs[0], locs[1])) - def get_event_perimeters(self, progress_position: int = 0, progress_description: str = '', all_t: bool = False): + def get_event_perimeters(self, + progress_position: int = 0, + progress_description: str = '', + show_progress: bool = True, + all_t: bool = False): """ Iterate through each cell in the 3D MODIS Burn Date tile and group it into fire events using the space-time window. @@ -289,7 +293,14 @@ def get_event_perimeters(self, progress_position: int = 0, progress_description: time_index_buffer = max(1, self.temporal_param // 30) - for pair in tqdm(available_pairs, position=progress_position, file=sys.stdout, desc=progress_description): + iterator = available_pairs + if show_progress: + iterator = tqdm(available_pairs, + position=progress_position, + file=sys.stdout, + desc=progress_description) + + for pair in iterator: y, x = pair top, bottom, left, right, center, origin, is_edge = self._get_spatial_window(y, x, dims) center_y, center_x = center @@ -546,12 +557,12 @@ def merge_fire_edge_events(self, edge_events: List[EventPerimeter]) -> List[Even groups = self.group_by_t(self.group_by_y(self.group_by_x(edge_events))) merged_events = [] - for i, group in enumerate(groups): + for group in tqdm(groups, desc='Merging edge events', file=sys.stdout): group_array, coordinates = self._create_event_grid_array(group) event_grid = EventGrid(out_dir=self._out_dir, input_array=group_array, coordinates=coordinates) - perimeters = event_grid.get_event_perimeters(progress_position=i, - progress_description=f'Merging edge tiles for group {i} of' - f' {len(groups)}', all_t=True) + + perimeters = event_grid.get_event_perimeters(all_t=True, show_progress=False) + del event_grid, group_array, coordinates, group merged_events.extend(perimeters) @@ -563,7 +574,7 @@ def build_events(self): them all together for a seamless set of wildfire events. """ - print('Building fire event perimeters') + print('\nBuilding fire event perimeters ...') fire_events = [] @@ -577,6 +588,7 @@ def build_events(self): results = pool.imap_unordered(_process_file_perimeter, args) # As each file is processed, results will be appended to the fire_events list + results = list(pool.imap_unordered(_process_file_perimeter, args)) for result in results: fire_events.extend(result) @@ -589,6 +601,7 @@ def build_events(self): edge_event.compute_min_max() merged_edges = self.merge_fire_edge_events(edge_events) + print("\n") del edge_events @@ -598,6 +611,8 @@ def build_events(self): last_non_edge_id += 1 fire_events = non_edge + merged_edges + + del results gc.collect() return fire_events @@ -621,7 +636,7 @@ def build_points(self, event_perimeter_list: List[EventPerimeter], shape_file_pa df["y"] = df["y"] + (self.geom[-1] / 2) # Each entry gets a point object from the x and y coordinates. - print("Converting data frame to spatial object...") + print("Converting data frame to spatial object...\n") df["geometry"] = df[["x", "y"]].apply(lambda x: Point(tuple(x)), axis=1) gdf = gpd.GeoDataFrame(df, crs=self.crs.proj4, geometry=df["geometry"]) @@ -735,7 +750,7 @@ def point_query(row): sgdfs = [] for year in tqdm(burn_years, position=0, file=sys.stdout): - sgdf = gdf[gdf['ig_year'] == year] + sgdf = gdf[gdf['ig_year'] == year].copy() # Now set year one back for land_cover year = year - 1 @@ -762,7 +777,6 @@ def point_query(row): gdf['lc_type'] = lc_descriptions[land_cover_type] gdf.rename({'lc_description': 'lc_desc'}, inplace=True, axis='columns') - print('C') return gdf def _add_attributes_from_na_cec(self, gdf, eco_region_level): @@ -846,7 +860,7 @@ def add_eco_region_attributes(self, gdf: gpd.GeoDataFrame, eco_region_type: EcoR return gdf def process_geometry(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - print("Creating buffer...") + print("\nCreating pixel geometry (buffer)...") geometry = gdf.buffer(1 + (self._res / 2)) gdf["geometry"] = geometry gdf["geometry"] = gdf.envelope @@ -862,19 +876,19 @@ def _create_did_column(df, columns): def process_daily_data(self, gdf: gpd.GeoDataFrame, output_csv_path: str, daily_shp_path: str, daily_gpkg_path: str) -> gpd.GeoDataFrame: - print("Dissolving polygons...") + print("Creating daily polygons (dissolving) ...") gdf = self._create_did_column(gdf, ['date', 'id']) gdfd = gdf.dissolve(by="did", as_index=False) - print("Converting polygons to multipolygons...") + # print("Converting polygons to multipolygons...") gdfd["geometry"] = gdfd["geometry"].apply(self._as_multi_polygon) self.save_data(gdfd, daily_shp_path, daily_gpkg_path, output_csv_path) gdf = gdfd.drop(['did', 'pixels', 'date', 'event_day', 'dy_ar_km2'], axis=1) gdf = gdf.dissolve(by="id", as_index=False) - print("Calculating perimeter lengths...") + # print("Calculating perimeter lengths...") gdf["tot_perim"] = gdf["geometry"].length gdf["geometry"] = gdf["geometry"].apply(self._as_multi_polygon) @@ -887,11 +901,11 @@ def process_event_data(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: if 'fid' in gdf.columns: gdf = gdf.drop(['fid']) - print("Dissolving polygons...") + # print("Dissolving polygons...") gdf = gdf.dissolve(by="id", as_index=False) - print("Calculating perimeter lengths...") + # print("Calculating perimeter lengths...") gdf["tot_perim"] = gdf["geometry"].length - print("Converting polygons to multipolygons...") + # print("Converting polygons to multipolygons...") gdf["geometry"] = gdf["geometry"].apply(self._as_multi_polygon) return gdf @@ -910,14 +924,16 @@ def save_event_data(self, gdf: gpd.GeoDataFrame, output_csv_path: str, event_sha @staticmethod def save_data(gdf: gpd.GeoDataFrame, shape_path: str = None, gpkg_path: str = None, csv_path: str = None): if csv_path: + print('\nWriting tabular fire events (csv) ...') gdf.to_csv(csv_path, index=False) if gpkg_path is not None: + print('\nWriting spatial fire events ...') gdf.to_file(gpkg_path, driver="GPKG") print("Saving file to " + gpkg_path) if shape_path is not None: - print('Writing shape file') + print('\nWriting spatial fire events ...') if 'date' in gdf.columns: gdf['date'] = [str(d) for d in gdf['date']] gdf['ig_date'] = [str(d) for d in gdf['ig_date']] diff --git a/tests/test_earthdata.py b/tests/test_earthdata.py new file mode 100644 index 0000000..44e8953 --- /dev/null +++ b/tests/test_earthdata.py @@ -0,0 +1,38 @@ +""" +Testing access to NASA's earth data +""" + +from http.cookiejar import CookieJar +import urllib.request + +def test_earthdata_credentials(username: str, password: str) -> None: + # Earthdata Login + # test url for correct user/password + root_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/MCD12Q1.061/' + url = root_url + 'MCD12Q1.A2019001.h10v09.061.2022169160720/BROWSE.MCD12Q1.A2019001.h10v09.061.2022169160720.1.jpg' + # the below url does not work as of July 2025 + # url = "https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.061/2019.01.01/BROWSE.MCD12Q1.A2019001.h10v09.061.2022169160720.1.jpg" + + password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm() + password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password) + # Create a cookie jar for storing cookies. This is used to store and return + # the session cookie given to use by the data server (otherwise it will just + # keep sending us back to Earthdata Login to authenticate). Ideally, we + # should use a file based cookie jar to preserve cookies between runs. This + # will make it much more efficient. + cookie_jar = CookieJar() + # Install all the handlers. + opener = urllib.request.build_opener( + urllib.request.HTTPBasicAuthHandler(password_manager), + # urllib.request.HTTPHandler(debuglevel=1), # Uncomment these two lines to see + # urllib.request.HTTPSHandler(debuglevel=1), # details of the requests/responses + urllib.request.HTTPCookieProcessor(cookie_jar)) + urllib.request.install_opener(opener) + + request = urllib.request.Request(url) + urllib.request.urlopen(request) + +# test the function +un = 'maco4303' +pw = '5Stringbanjo1!' +test_earthdata_credentials(un, pw) \ No newline at end of file diff --git a/tests/test_landcover_cmr_class.py b/tests/test_landcover_cmr_class.py new file mode 100644 index 0000000..33ec131 --- /dev/null +++ b/tests/test_landcover_cmr_class.py @@ -0,0 +1,121 @@ +import os +import re +import sys +import requests +import urllib.parse +from typing import List, Dict +from http.cookiejar import CookieJar +import rasterio +from rasterio.merge import merge +from tqdm import tqdm + +# Assume LPDAAC and LandCoverType are defined elsewhere in your codebase +class LandCover(LPDAAC): + def __init__(self, out_dir: str, n_cores: int = None, username: str = None, password: str = None): + super().__init__(out_dir) + self._lp_daac_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/MCD12Q1.061/' + self._parallel_cores = n_cores if n_cores is not None else os.cpu_count() - 1 + self._username = username + self._password = password + self._file_regex = r'MCD12Q1\.A\d{7}\.h\d{2}v\d{2}\.061\.\d{13}\.hdf' + + def _generate_local_hdf_path(self, year: str, remote_name: str) -> str: + return os.path.join(self._land_cover_dir, year, remote_name) + + def _generate_local_hdf_dir(self, year: str) -> str: + return os.path.join(self._land_cover_dir, year) + + def _extract_year_from_granule(self, granule_id: str) -> str: + return granule_id.split('.')[1][1:5] + + def _query_cmr_granules(self, tiles: List[str]) -> Dict[str, List[str]]: + cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json" + short_name = "MCD12Q1" + version = "061" + + tile_patterns = [f"h{tile[1:3]}v{tile[4:]}" for tile in tiles] # e.g., 'h10v09' + + params = { + "short_name": short_name, + "version": version, + "provider": "LPCLOUD", + "page_size": 2000, + "page_num": 1, + } + + granules_by_year = {} + while True: + response = requests.get(cmr_url, params=params) + response.raise_for_status() + items = response.json()["feed"]["entry"] + if not items: + break + for item in items: + granule_id = item["title"] + if any(tile in granule_id for tile in tile_patterns): + year = self._extract_year_from_granule(granule_id) + granules_by_year.setdefault(year, []).append(granule_id) + params["page_num"] += 1 + + return granules_by_year + + def _create_requests(self, granules: List[str]): + download_requests = [] + for granule in granules: + year = self._extract_year_from_granule(granule) + remote_file = f"{granule}.hdf" + local_file_path = self._generate_local_hdf_path(year, remote_file) + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + + if os.path.exists(local_file_path): + if not self._verify_hdf_file(local_file_path): + print('Removing corrupt file:', local_file_path) + os.remove(local_file_path) + else: + continue + + url = f"{self._lp_daac_url}{granule}/{remote_file}" + download_requests.append((url, local_file_path)) + + return download_requests + + def _create_annual_mosaic(self, year: str, land_cover_type: LandCoverType = LandCoverType.IGBP): + output_file = f"lc_mosaic_{land_cover_type.value}_{year}.tif" + lc_files = [self._generate_local_hdf_path(year, f) for f in os.listdir(self._generate_local_hdf_dir(year)) + if re.match(self._file_regex, os.path.basename(f)) is not None] + + datasets = [] + for lc_file_path in lc_files: + with rasterio.open(lc_file_path) as lc_file: + datasets.append([sd for sd in lc_file.subdatasets if str(land_cover_type.value) in sd.lower()][0]) + + tiles = [rasterio.open(ds) for ds in datasets] + mosaic, transform = merge(tiles) + + crs = tiles[0].meta.copy() + crs.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "transform": transform}) + + with rasterio.open(os.path.join(self._mosaics_dir, output_file), "w+", **crs) as dst: + dst.write(mosaic) + + def get_land_cover(self, tiles: List[str] = None, land_cover_type: LandCoverType = LandCoverType.IGBP): + if tiles is None: + tiles = self.get_all_available_tiles() + + print("Querying granules across all years...") + granules_by_year = self._query_cmr_granules(tiles) + + for year in tqdm(sorted(granules_by_year.keys()), desc="Processing years", dynamic_ncols=True): + tqdm.write(f"Downloading data for {year}...") + granules = granules_by_year[year] + output_file = f"lc_mosaic_{land_cover_type.value}_{year}.tif" + if os.path.exists(os.path.join(self._mosaics_dir, output_file)): + continue + + download_requests = self._create_requests(granules) + self._download_files(download_requests) + + tqdm.write(f"Mosaicking land cover tiles for {year}...") + self._create_annual_mosaic(str(year), land_cover_type) + + print(f"Land cover data saved to {self._mosaics_dir}")