GFDRR · matamadio · Dec 2, 2025 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/Processing_scores/FATHOM_GFDRR_ThinkHazard.ipynb b/Processing_scores/FATHOM_GFDRR_ThinkHazard.ipynb
@@ -0,0 +1,232 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Flood summaries for ThinkHazard\n",
+    "\n",
+    "This script performs flood hazard ranking by administrative unit using global-extent\n",
+    "Fathom tiles hosted on AWS S3, rather than country-extent locally downloaded data.\n",
+    "\n",
+    "The hazard ranking is based on:\n",
+    "- Value threshold: Minimum flood depth (cm) to consider (50 cm)\n",
+    "- Area threshold: Minimum percentage of area affected (3%)\n",
+    "- Hazard score: Count of return periods meeting BOTH thresholds (0-4)\n",
+    "\n",
+    "**CORRECTED VERSION**: Fixed windowing bugs and added proper hazard score calculation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, time, io, json, sys\n",
+    "import urllib3\n",
+    "import boto3\n",
+    "\n",
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from functools import reduce\n",
+    "from urllib3.exceptions import InsecureRequestWarning\n",
+    "from botocore import UNSIGNED\n",
+    "from botocore.config import Config\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "# Import helper functions\n",
+    "from gfdrr_helper import *\n",
+    "\n",
+    "urllib3.disable_warnings(InsecureRequestWarning)\n",
+    "\n",
+    "def tPrint(s):\n",
+    "    \"\"\"prints the time along with the message\"\"\"\n",
+    "    print(\"%s\\t%s\" % (time.strftime(\"%H:%M:%S\"), s))\n",
+    "\n",
+    "s3_client = boto3.client('s3', verify=False, config=Config(signature_version=UNSIGNED))\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Configuration\n",
+    "local_folder = \"C:/WBG/Work/Projects/ThinkHazard\"\n",
+    "out_folder = os.path.join(local_folder, \"FATHOM_summaries\")\n",
+    "map_folder = os.path.join(local_folder, \"FATHOM_maps\")\n",
+    "for tF in [out_folder, map_folder]:\n",
+    "    if not os.path.exists(tF):\n",
+    "        os.makedirs(tF)\n",
+    "\n",
+    "vrt_folder = r\"C:\\WBG\\Work\\data\\FATHOM\"\n",
+    "s3_bucket = \"wbg-geography01\"\n",
+    "s3_prefix = \"FATHOM\"\n",
+    "\n",
+    "# Return periods to process\n",
+    "return_periods = [10, 100, 500, 1000]\n",
+    "\n",
+    "# Flood types and their VRT file patterns\n",
+    "flood_files = [\n",
+    "    [\"FU\", \"FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-FLUVIAL-UNDEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt\"],\n",
+    "    [\"CU\", \"FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-COASTAL-UNDEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt\"],\n",
+    "    ['PD', \"FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt\"]\n",
+    "]\n",
+    "\n",
+    "# Load administrative boundaries\n",
+    "admin_boundaries_file = r\"C:\\WBG\\Work\\data\\ADMIN\\NEW_WB_BOUNDS\\FOR_PUBLICATION\\crs_4326\\parquet\\WB_GAD_ADM2.parquet\"\n",
+    "inA = gpd.read_parquet(admin_boundaries_file)\n",
+    "\n",
+    "print(f\"Loaded {len(inA)} administrative units\")\n",
+    "print(f\"Countries: {len(inA['ISO_A3'].unique())}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Main processing cell - CORRECTED VERSION\n\ncur_out_folder = os.path.join(out_folder, \"FATHOM_Detailed\")\nif not os.path.exists(cur_out_folder):\n    os.makedirs(cur_out_folder)\n\ncur_map_folder = os.path.join(map_folder, \"FATHOM_Detailed\")\nif not os.path.exists(cur_map_folder):\n    os.makedirs(cur_map_folder)\n\n# Define thresholds for hazard scoring\nVALUE_THRESHOLD = 50   # cm - minimum mean depth\nAREA_THRESHOLD = 3.0   # % - minimum area fraction\n\ntPrint(f\"Thresholds: Value={VALUE_THRESHOLD} cm, Area={AREA_THRESHOLD}%\")\n\nwith rasterio.Env(GDAL_HTTP_UNSAFESSL='YES'):\n    for sel_country in ['TUN','GEO','PHL']: #inA['ISO_A3'].unique():\n        all_res = []\n        out_file = os.path.join(cur_out_folder, f\"FATHOM_ThinkHazard_summary_{sel_country}.csv\")\n        sel_a = inA[inA['ISO_A3'] == sel_country]\n        \n        if not os.path.exists(out_file) and not (sel_country in [\"FJI\",'RUS']):\n            tPrint(f\"Processing country: {sel_country} ({len(sel_a)} units)\")\n            \n            # Process each flood type and return period\n            for lbl, raster_file in flood_files:\n                for return_period in return_periods:\n                    tPrint(f\"Processing {lbl} for {return_period} year return period\")\n                    sel_raster_file = raster_file.format(rp=return_period)\n                    sel_raster = f\"s3://{s3_bucket}/{s3_prefix}/{sel_raster_file}\"\n                    \n                    # CORRECTED: Removed no_data parameter (now handled automatically with data < 0)\n                    res_a = calculate_think_hazard_score(sel_a, sel_raster,\n                                                         depth_threshold=50,\n                                                         idx_col='ADM2CD_c',\n                                                         all_touched=True)\n                    \n                    res_a.rename(columns={\n                        'frac_area_flooded': f'frac_area_flooded_{lbl}_{return_period}yr',\n                        'mean_val': f'mean_val_{lbl}_{return_period}yr'\n                    }, inplace=True)\n                    all_res.append(res_a)\n            \n            # Merge all results\n            tPrint(\"Merging results...\")\n            all_res_df = reduce(lambda left, right: pd.merge(left, right, on='ADM2CD_c', how='outer'), all_res)\n            \n            # ADDED: Calculate hazard scores based on dual thresholds\n            tPrint(\"Calculating hazard scores...\")\n            for lbl, _ in flood_files:\n                # Get columns for this flood type across all return periods\n                rp_cols = [(f'mean_val_{lbl}_{rp}yr', f'frac_area_flooded_{lbl}_{rp}yr')\n                           for rp in return_periods]\n                \n                # Calculate score: count return periods where BOTH thresholds are exceeded\n                all_res_df[f'{lbl}_score'] = all_res_df.apply(\n                    lambda row: sum(\n                        1 for mean_col, area_col in rp_cols\n                        if (row.get(mean_col, 0) >= VALUE_THRESHOLD and \n                            row.get(area_col, 0) >= AREA_THRESHOLD)\n                    ),\n                    axis=1\n                )\n                \n                tPrint(f\"  {lbl} scores: {all_res_df[f'{lbl}_score'].value_counts().sort_index().to_dict()}\")\n            \n            # Fill NaN values with 0\n            all_res_df.fillna(0, inplace=True)\n            \n            # Save results\n            all_res_df.to_csv(out_file, index=False)\n            tPrint(f\"Saved results to {out_file}\")\n            \n            # Create map\n            sel_a = inA[inA['ISO_A3'] == sel_country]\n            map_adm = pd.merge(sel_a, all_res_df, on='ADM2CD_c', how='left')\n            map_file = os.path.join(cur_map_folder, f\"flood_map_{sel_country}_100yr.png\")\n            map_flood(map_adm, return_period=100, out_file=map_file)\n            tPrint(f\"Saved map to {map_file}\")\n        else:\n            if os.path.exists(out_file):\n                tPrint(f\"File already exists for {sel_country}, skipping...\")\n            else:\n                tPrint(f\"Skipping {sel_country} (in exclusion list)\")\n\ntPrint(\"Processing complete!\")"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verification: Check results for a specific country"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and inspect results\n",
+    "sel_country = 'TUN'\n",
+    "result_file = os.path.join(cur_out_folder, f\"FATHOM_ThinkHazard_summary_{sel_country}.csv\")\n",
+    "\n",
+    "if os.path.exists(result_file):\n",
+    "    results = pd.read_csv(result_file)\n",
+    "    print(f\"Results for {sel_country}: {len(results)} units\")\n",
+    "    print(\"\\nColumns:\", list(results.columns))\n",
+    "    print(\"\\nSample results:\")\n",
+    "    display(results.head())\n",
+    "    \n",
+    "    print(\"\\nHazard Score Distribution:\")\n",
+    "    for flood_type in ['FU', 'CU', 'PD']:\n",
+    "        score_col = f'{flood_type}_score'\n",
+    "        if score_col in results.columns:\n",
+    "            print(f\"\\n{flood_type}:\")\n",
+    "            print(results[score_col].value_counts().sort_index())\n",
+    "else:\n",
+    "    print(f\"Results file not found: {result_file}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verification: Check specific units with known values\n",
+    "\n",
+    "Expected values for Tunisia Coastal RP1000:\n",
+    "- TUN016015: area ~4.5% (not 1.57%)\n",
+    "- TUN016001: area ~2.9% (not 0.23%)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check specific units\n",
+    "test_units = ['TUN016015', 'TUN016001']\n",
+    "\n",
+    "if os.path.exists(result_file):\n",
+    "    results = pd.read_csv(result_file)\n",
+    "    test_results = results[results['ADM2CD_c'].isin(test_units)]\n",
+    "    \n",
+    "    if len(test_results) > 0:\n",
+    "        print(\"Test units - Coastal RP1000:\")\n",
+    "        for _, row in test_results.iterrows():\n",
+    "            unit = row['ADM2CD_c']\n",
+    "            mean_val = row.get('mean_val_CU_1000yr', 0)\n",
+    "            area_pct = row.get('frac_area_flooded_CU_1000yr', 0)\n",
+    "            score = row.get('CU_score', 0)\n",
+    "            print(f\"  {unit}: mean={mean_val:.2f} cm, area={area_pct:.2f}%, score={score}\")\n",
+    "        \n",
+    "        print(\"\\nExpected:\")\n",
+    "        print(\"  TUN016015: area ~4.5%\")\n",
+    "        print(\"  TUN016001: area ~2.9%\")\n",
+    "    else:\n",
+    "        print(\"Test units not found in results\")\n",
+    "else:\n",
+    "    print(f\"Results file not found\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Optional: Extract sample data for testing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Uncomment and run if you need to extract sample raster data\n",
+    "\n",
+    "# sys.path.insert(0, \"C:\\\\WBG\\\\Work\\\\Code\\\\GOSTrocks\\\\src\")\n",
+    "# import GOSTrocks.rasterMisc as rMisc\n",
+    "\n",
+    "# temp_out_folder = \"C:/Temp/FATHOM_TUN\"\n",
+    "# if not os.path.exists(temp_out_folder):\n",
+    "#     os.makedirs(temp_out_folder)\n",
+    "\n",
+    "# sel_admin = inA.loc[inA['ISO_A3'] == \"TUN\"]\n",
+    "# sel_admin.to_file(os.path.join(temp_out_folder, \"TUN_admin.gpkg\"), driver=\"GPKG\")\n",
+    "\n",
+    "# for return_period in return_periods:\n",
+    "#     for lbl, raster_file in flood_files:\n",
+    "#         temp_out_file = os.path.join(temp_out_folder, f\"TUN_{lbl}_{return_period}yr.tif\")\n",
+    "#         if not os.path.exists(temp_out_file):\n",
+    "#             sel_raster_file = raster_file.format(rp=return_period)\n",
+    "#             sel_raster = f\"s3://{s3_bucket}/{s3_prefix}/{sel_raster_file}\"\n",
+    "#             with rasterio.Env(GDAL_HTTP_UNSAFESSL='YES'):\n",
+    "#                 inR = rasterio.open(sel_raster)\n",
+    "#                 rMisc.clipRaster(inR, sel_admin, temp_out_file, crop=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Processing_scores/TH_FL_GUI.ipynb b/Processing_scores/TH_FL_GUI.ipynb
@@ -0,0 +1,80 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **RDL - Flood Hazard Threshold Analysis**\n",
+    "\n",
+    "## Flood hazard threshold-based scoring: river, pluvial and coastal.\n",
+    "\n",
+    "<p>Based on Fathom v3 (2023) hazard data, produces hazard scores for administrative units based on value and area thresholds across multiple return periods.</p>\n",
+    "<p>The score represents the number of selected return periods that exceed the specified thresholds (ranges from 0 to N, where N is the number of selected return periods).</p>\n",
+    "<p>The tool expects Fathom v3 data as country-wide RP layers in the HZD folder. To merge downloaded tiles, use the <a href=\"F3/F3_preprocessing.ipynb\"><b>Fathom 3 pre-processing tool</b></a>.</p>\n",
+    "\n",
+    "### Cell > Run all  or   CTRL + Enter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SSL verification has been disabled for this session.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "15dadae7997741b0a2c6d7249f6cd38a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='\\n    <div style=\\'\\n        background: linear-gradient(to bottom, #003366, transp…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%matplotlib inline\n",
+    "from TH_FL_utils import initialize_tool\n",
+    "initialize_tool()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}