diff --git a/databricks-skills/databricks-aibi-dashboards/.tests/.gitignore b/databricks-skills/databricks-aibi-dashboards/.tests/.gitignore new file mode 100644 index 00000000..97cac33a --- /dev/null +++ b/databricks-skills/databricks-aibi-dashboards/.tests/.gitignore @@ -0,0 +1 @@ +.dashboard_id diff --git a/databricks-skills/databricks-aibi-dashboards/.tests/README.md b/databricks-skills/databricks-aibi-dashboards/.tests/README.md new file mode 100644 index 00000000..dcdadd87 --- /dev/null +++ b/databricks-skills/databricks-aibi-dashboards/.tests/README.md @@ -0,0 +1,31 @@ +# Dashboard skill — manual verification harness + +This folder hosts a Databricks-Connect script that seeds the test data the +worked example in `../4-examples.md` queries from. The point is to be able +to actually render the example dashboard end-to-end and visually confirm +the skill produces a state-of-the-art result. + +## Quick start + +```bash +pip install "databricks-connect>=15.0" + +# uses your default Databricks auth (env vars or ~/.databrickscfg) +python generate_test_data.py +``` + +Defaults: writes `main.dashboard_skill_test.support_cases` with 5000 rows. +Override via `TEST_CATALOG`, `TEST_SCHEMA`, `TEST_TABLE`, `NUM_CASES`. + +The script prints the next-step commands to create the dashboard against +the generated table. + +## Why this isn't a pytest + +This is intentionally a one-shot script, not an automated test: + +- The point is **human visual verification** of the rendered dashboard + (theme coherence, layout, color semantics) — those judgments don't + belong in CI. +- The dataset takes ~10s to write and is meant to be inspected once per + meaningful change to `4-examples.md`. diff --git a/databricks-skills/databricks-aibi-dashboards/.tests/create_dashboard.py b/databricks-skills/databricks-aibi-dashboards/.tests/create_dashboard.py new file mode 100644 index 00000000..ae2aa3bf --- /dev/null +++ b/databricks-skills/databricks-aibi-dashboards/.tests/create_dashboard.py @@ -0,0 +1,117 @@ +""" +Create the example dashboard in the workspace from the saved JSON. + +Pairs with `generate_test_data.py` (which seeds the support_cases table). +Run that first, then this script: + + pip install "databricks-sdk>=0.30" + python create_dashboard.py + +Env vars (all optional, same defaults as generate_test_data.py): + TEST_CATALOG default: main + TEST_SCHEMA default: dashboard_skill_test + DASHBOARD_NAME default: "Dashboard skill — example (Support Operations)" + DASHBOARD_PARENT_PATH default: /Users/ (workspace home) + +Behavior: if a dashboard with the same name already exists in the parent +folder, it is updated in place (same dashboard_id). Otherwise a new one is +created. Either way the URL is printed at the end. +""" + +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path + +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.dashboards import Dashboard + +CATALOG = os.environ.get("TEST_CATALOG", "main") +SCHEMA = os.environ.get("TEST_SCHEMA", "dashboard_skill_test") +DASHBOARD_NAME = os.environ.get( + "DASHBOARD_NAME", "Dashboard skill — example (Support Operations)" +) + +JSON_PATH = Path(__file__).parent / "dashboard.lvdash.json" + + +def main() -> None: + if not JSON_PATH.exists(): + sys.exit(f"Missing dashboard JSON: {JSON_PATH}") + + with JSON_PATH.open() as f: + spec = json.load(f) + + # Datasets in the saved JSON use bare `FROM support_cases`. Prefix with the + # catalog/schema the data-gen script writes to, so the dashboard binds to + # the real test table. + bare = "FROM support_cases" + qualified = f"FROM {CATALOG}.{SCHEMA}.support_cases" + n_replacements = 0 + for ds in spec.get("datasets", []): + for i, line in enumerate(ds.get("queryLines", [])): + if bare in line: + ds["queryLines"][i] = line.replace(bare, qualified) + n_replacements += 1 + print(f"Prefixed {n_replacements} `FROM support_cases` references → {qualified}") + + serialized = json.dumps(spec) + + w = WorkspaceClient() + me = w.current_user.me().user_name + parent_path = os.environ.get("DASHBOARD_PARENT_PATH", f"/Users/{me}") + + # Look for a previous run by reading a sidecar id file (avoids `lakeview.list()`, + # which paginates over every dashboard the user can see and is painfully slow on + # large workspaces like field-eng). + id_file = Path(__file__).parent / ".dashboard_id" + existing_id: str | None = None + if id_file.exists(): + candidate = id_file.read_text().strip() + try: + w.lakeview.get(candidate) # verify it still exists + existing_id = candidate + except Exception: + existing_id = None # stale id, fall through to create + + if existing_id: + print(f"Updating existing dashboard {existing_id}...") + updated = w.lakeview.update( + dashboard_id=existing_id, + dashboard=Dashboard( + display_name=DASHBOARD_NAME, + serialized_dashboard=serialized, + ), + ) + dashboard_id = updated.dashboard_id + else: + print(f"Creating new dashboard in {parent_path}...") + created = w.lakeview.create( + dashboard=Dashboard( + display_name=DASHBOARD_NAME, + parent_path=parent_path, + serialized_dashboard=serialized, + ) + ) + dashboard_id = created.dashboard_id + + # Publish so it's openable via the v3 dashboard URL. + try: + w.lakeview.publish(dashboard_id=dashboard_id) + except Exception as e: + # Publishing is best-effort — if the workspace is configured to require + # manual publish, just report it instead of failing the whole run. + print(f" (publish skipped: {e})") + + id_file.write_text(dashboard_id) + host = w.config.host.rstrip("/") + print() + print("Done.") + print(f" Dashboard ID: {dashboard_id}") + print(f" URL: {host}/dashboardsv3/{dashboard_id}") + + +if __name__ == "__main__": + main() diff --git a/databricks-skills/databricks-aibi-dashboards/.tests/dashboard.lvdash.json b/databricks-skills/databricks-aibi-dashboards/.tests/dashboard.lvdash.json new file mode 100644 index 00000000..a608a27d --- /dev/null +++ b/databricks-skills/databricks-aibi-dashboards/.tests/dashboard.lvdash.json @@ -0,0 +1,944 @@ +{ + "datasets": [ + { + "name": "ds_support", + "displayName": "Support cases", + "queryLines": [ + "SELECT case_id, opened_at, closed_at, priority, channel, region_name,\n", + " customer_id, reopened_flag, satisfaction_score,\n", + " customer_latitude, customer_longitude,\n", + " (unix_timestamp(closed_at) - unix_timestamp(opened_at)) / 3600.0 AS time_to_resolution_hours\n", + "FROM support_cases" + ], + "columns": [ + { + "displayName": "Total Cases", + "description": "Count of support cases", + "expression": "COUNT(`case_id`)" + }, + { + "displayName": "Avg Resolution Hours", + "description": "Mean resolution time across closed cases", + "expression": "AVG(`time_to_resolution_hours`)" + }, + { + "displayName": "Reopen Rate %", + "description": "Percent of cases reopened after closure", + "expression": "SUM(CASE WHEN `reopened_flag`=true THEN 1 ELSE 0 END) * 1.0 / COUNT(`case_id`)" + }, + { + "displayName": "Avg Satisfaction", + "description": "Average customer satisfaction (1-10)", + "expression": "AVG(`satisfaction_score`)" + }, + { + "displayName": "Priority Level", + "description": "Sortable priority label", + "expression": "CASE WHEN `priority`='Critical' THEN '1-Critical' WHEN `priority`='High' THEN '2-High' WHEN `priority`='Medium' THEN '3-Medium' ELSE '4-Low' END" + } + ] + }, + { + "name": "ds_forecast", + "displayName": "Cases forecast", + "queryLines": [ + "WITH actuals AS (\n", + " SELECT DATE_TRUNC('WEEK', opened_at) AS opened_at, COUNT(*) AS count\n", + " FROM support_cases\n", + " WHERE DATE_TRUNC('WEEK', opened_at) < DATE_TRUNC('WEEK', current_date())\n", + " GROUP BY 1\n", + "),\n", + "dates AS (SELECT MAX(opened_at) AS max_d, MIN(opened_at) AS min_d FROM actuals),\n", + "forecast AS (\n", + " SELECT opened_at, count_forecast, count_upper, count_lower, CAST(NULL AS BIGINT) AS count\n", + " FROM AI_FORECAST(TABLE(actuals),\n", + " horizon => (SELECT max_d + MAKE_DT_INTERVAL(CAST(FLOOR(DATEDIFF(max_d, min_d) * 0.5) AS INT), 0, 0, 0) FROM dates),\n", + " time_col => 'opened_at', value_col => 'count')\n", + "),\n", + "bridge AS (\n", + " SELECT a.opened_at, a.count AS count_forecast, a.count AS count_upper, a.count AS count_lower, a.count\n", + " FROM actuals a JOIN dates d ON a.opened_at = d.max_d\n", + ")\n", + "SELECT opened_at, CAST(NULL AS BIGINT) AS count_forecast, CAST(NULL AS BIGINT) AS count_upper, CAST(NULL AS BIGINT) AS count_lower, count FROM actuals\n", + "UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM bridge\n", + "UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM forecast" + ] + } + ], + "pages": [ + { + "name": "overview", + "displayName": "Overview", + "layout": [ + { + "widget": { + "name": "header", + "multilineTextboxSpec": { + "lines": [ + "# Support Operations \u2014 Post-Release Surge (4.1)\n\n**The story this week:** a clear volume spike in mid-February \u2014 the date the new Product 4.1 release went out (marked on the forecast chart). The release introduced a regression that drove a wave of Critical/High cases over the following 6 weeks: case volume jumps, average resolution time creeps up, reopen rate climbs, and customer satisfaction dips on the affected metros \u2014 visible on the satisfaction map as warmer (lower) scores. The forecast extends the trend forward so the team can size the cleanup ahead. Use the filters page to slice by region or resolution-time bucket to localize the impact." + ] + } + }, + "position": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + }, + { + "widget": { + "name": "kpi-total-cases", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Total Cases)", + "displayName": "Total Cases" + } + }, + "frame": { + "title": "Total Cases", + "showTitle": true + } + } + }, + "position": { + "x": 0, + "y": 3, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi-volume-trend", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "weekly(opened_at)", + "expression": "DATE_TRUNC(\"WEEK\", `opened_at`)" + }, + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + } + ], + "disaggregated": false, + "orders": [ + { + "direction": "DESC", + "expression": "DATE_TRUNC(\"WEEK\", `opened_at`)" + } + ] + } + } + ], + "spec": { + "version": 2, + "frame": { + "title": "Daily Case Volume ", + "showTitle": true + }, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Total Cases)", + "displayName": "This Week" + }, + "period": { + "fieldName": "weekly(opened_at)" + } + } + } + }, + "position": { + "x": 3, + "y": 3, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi-resolution", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Avg Resolution Hours)", + "expression": "MEASURE(`Avg Resolution Hours`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "frame": { + "title": "Avg Resolution Time", + "showTitle": true + }, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Avg Resolution Hours)", + "formatTemplate": "{{ @formatted }} hrs", + "displayName": "Avg Hours" + } + } + } + }, + "position": { + "x": 6, + "y": 3, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi-reopen", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Reopen Rate %)", + "expression": "MEASURE(`Reopen Rate %`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "frame": { + "title": "Reopen Rate (%)", + "showTitle": true + }, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Reopen Rate %)", + "format": { + "type": "number-percent", + "decimalPlaces": { + "type": "max", + "places": 2 + } + }, + "displayName": "Reopen Rate" + } + } + } + }, + "position": { + "x": 9, + "y": 3, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "case-forecast", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_forecast", + "fields": [ + { + "name": "opened_at", + "expression": "`opened_at`" + }, + { + "name": "count", + "expression": "`count`" + }, + { + "name": "count_forecast", + "expression": "`count_forecast`" + }, + { + "name": "count_upper", + "expression": "`count_upper`" + }, + { + "name": "count_lower", + "expression": "`count_lower`" + } + ], + "disaggregated": true + } + } + ], + "spec": { + "version": 1, + "widgetType": "forecast-line", + "encodings": { + "x": { + "fieldName": "opened_at", + "scale": { + "type": "temporal" + } + }, + "y": { + "scale": { + "type": "quantitative", + "domainMin": 0 + }, + "original": { + "fieldName": "count", + "displayName": "Cases" + }, + "prediction": { + "fieldName": "count_forecast", + "displayName": "Forecast" + }, + "predictionUpper": { + "fieldName": "count_upper" + }, + "predictionLower": { + "fieldName": "count_lower" + } + } + }, + "annotations": [ + { + "type": "vertical-line", + "encodings": { + "x": { + "dataValue": "2026-02-16T09:00:00.000", + "dataType": "DATETIME" + }, + "label": { + "value": "Product release 4.1" + }, + "color": { + "value": { + "hex": "#FF7E5C" + } + } + } + } + ], + "frame": { + "showTitle": true, + "title": "Case Volume \u2014 actuals + forecast" + } + } + }, + "position": { + "x": 0, + "y": 6, + "width": 8, + "height": 6 + } + }, + { + "widget": { + "name": "priority-by-channel", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "count(case_id)", + "expression": "COUNT(`case_id`)" + }, + { + "name": "Priority Level", + "expression": "`Priority Level`" + }, + { + "name": "channel", + "expression": "`channel`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "frame": { + "showTitle": true, + "title": "Cases by channel \u00d7 priority" + }, + "widgetType": "heatmap", + "encodings": { + "x": { + "fieldName": "Priority Level", + "scale": { + "type": "categorical" + } + }, + "y": { + "fieldName": "channel", + "scale": { + "type": "categorical" + } + }, + "color": { + "fieldName": "count(case_id)", + "scale": { + "type": "quantitative", + "colorRamp": { + "mode": "custom-sequential", + "colors": { + "start": "#FFA600", + "end": "#995495" + } + } + } + }, + "label": { + "show": true + } + } + } + }, + "position": { + "x": 8, + "y": 17, + "width": 4, + "height": 6 + } + }, + { + "widget": { + "name": "customer-map", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Avg Satisfaction)", + "expression": "MEASURE(`Avg Satisfaction`)" + }, + { + "name": "customer_latitude", + "expression": "`customer_latitude`" + }, + { + "name": "customer_longitude", + "expression": "`customer_longitude`" + }, + { + "name": "count(*)", + "expression": "COUNT(`*`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "frame": { + "showTitle": true, + "title": "Customer Satisfaction Map" + }, + "mark": { + "opacity": 0.7 + }, + "widgetType": "symbol-map", + "encodings": { + "coordinates": { + "latitude": { + "fieldName": "customer_latitude" + }, + "longitude": { + "fieldName": "customer_longitude" + } + }, + "color": { + "fieldName": "measure(Avg Satisfaction)", + "scale": { + "type": "quantitative", + "colorRamp": { + "mode": "custom-sequential", + "colors": { + "start": "#FFDC00", + "end": "#995495" + } + } + } + }, + "size": { + "fieldName": "count(*)", + "scale": { + "type": "quantitative" + } + } + } + } + }, + "position": { + "x": 0, + "y": 12, + "width": 8, + "height": 5 + } + }, + { + "widget": { + "name": "resolution-distribution", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "channel", + "expression": "`channel`" + }, + { + "name": "bin(time_to_resolution_hours, binWidth=2)", + "expression": "BIN_FLOOR(`time_to_resolution_hours`, 2)" + }, + { + "name": "count(*)", + "expression": "COUNT(`*`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "frame": { + "showTitle": true, + "title": "Resolution time (hours)" + }, + "widgetType": "histogram", + "encodings": { + "x": { + "fieldName": "bin(time_to_resolution_hours, binWidth=2)", + "scale": { + "type": "quantitative", + "domain": { + "max": 175 + } + } + }, + "y": { + "fieldName": "count(*)", + "scale": { + "type": "quantitative" + } + }, + "color": { + "fieldName": "channel", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "Email", + "color": "#FF7054" + } + ] + } + } + } + } + }, + "position": { + "x": 8, + "y": 6, + "width": 4, + "height": 6 + } + }, + { + "widget": { + "name": "case-detail", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "case_id", + "expression": "`case_id`" + }, + { + "name": "opened_at", + "expression": "`opened_at`" + }, + { + "name": "channel", + "expression": "`channel`" + }, + { + "name": "Priority Level", + "expression": "`Priority Level`" + }, + { + "name": "time_to_resolution_hours", + "expression": "`time_to_resolution_hours`" + }, + { + "name": "satisfaction_score", + "expression": "`satisfaction_score`" + } + ], + "disaggregated": true + } + } + ], + "spec": { + "version": 2, + "widgetType": "table", + "encodings": { + "columns": [ + { + "fieldName": "case_id", + "displayName": "Case" + }, + { + "fieldName": "opened_at", + "displayName": "Opened" + }, + { + "fieldName": "channel", + "displayName": "Channel" + }, + { + "fieldName": "Priority Level", + "displayName": "Priority" + }, + { + "fieldName": "time_to_resolution_hours", + "displayName": "Hours to resolve", + "format": { + "type": "number", + "decimalPlaces": { + "type": "max", + "places": 1 + } + }, + "style": { + "type": "basic", + "rules": [ + { + "condition": { + "operand": { + "type": "data-value", + "value": "24" + }, + "operator": ">" + }, + "backgroundColor": { + "hex": "#FF7E5C" + } + } + ] + } + }, + { + "fieldName": "satisfaction_score", + "displayName": "CSAT" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Case Detail" + } + } + }, + "position": { + "x": 0, + "y": 17, + "width": 8, + "height": 6 + } + }, + { + "widget": { + "name": "b4dd0785", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + }, + { + "name": "channel", + "expression": "`channel`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "frame": { + "showTitle": true, + "title": "Cases by channel", + "description": "Distribution of support cases across intake channels." + }, + "widgetType": "pie", + "encodings": { + "angle": { + "fieldName": "measure(Total Cases)", + "scale": { + "type": "quantitative" + }, + "displayName": "Cases" + }, + "color": { + "fieldName": "channel", + "displayName": "Channel", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "Email", + "color": "#FF7054" + }, + { + "value": "Chat", + "color": "#FFA600" + }, + { + "value": "Phone", + "color": "#DE5582" + }, + { + "value": "Web Form", + "color": "#995495" + } + ] + } + }, + "label": { + "show": true + } + } + } + }, + "position": { + "x": 8, + "y": 12, + "width": 4, + "height": 5 + } + } + ], + "pageType": "PAGE_TYPE_CANVAS", + "layoutVersion": "GRID_V1" + }, + { + "name": "filters", + "displayName": "Filters", + "layout": [ + { + "widget": { + "name": "filter-date", + "queries": [ + { + "name": "ds_date", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "opened_at", + "expression": "`opened_at`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "filter-date-range-picker", + "encodings": { + "fields": [ + { + "fieldName": "opened_at", + "queryName": "ds_date" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Date" + } + } + }, + "position": { + "x": 0, + "y": 0, + "width": 4, + "height": 2 + } + }, + { + "widget": { + "name": "filter-region", + "queries": [ + { + "name": "ds_region", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "region_name", + "expression": "`region_name`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "filter-multi-select", + "encodings": { + "fields": [ + { + "fieldName": "region_name", + "queryName": "ds_region", + "displayName": "Region" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Region" + } + } + }, + "position": { + "x": 4, + "y": 0, + "width": 4, + "height": 2 + } + }, + { + "widget": { + "name": "filter-resolution-time", + "queries": [ + { + "name": "ds_resolution", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "min(time_to_resolution_hours)", + "expression": "MIN(`time_to_resolution_hours`)" + }, + { + "name": "max(time_to_resolution_hours)", + "expression": "MAX(`time_to_resolution_hours`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "range-slider", + "encodings": { + "fields": [ + { + "fieldName": "time_to_resolution_hours", + "queryName": "ds_resolution" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Resolution time (hrs)" + } + } + }, + "position": { + "x": 8, + "y": 0, + "width": 4, + "height": 2 + } + } + ], + "pageType": "PAGE_TYPE_GLOBAL_FILTERS", + "layoutVersion": "GRID_V1" + } + ], + "uiSettings": { + "theme": { + "canvasBackgroundColor": { + "light": "#FCFCFC", + "dark": "#1F272D" + }, + "widgetBackgroundColor": { + "light": "#FFFFFF", + "dark": "#11171C" + }, + "fontColor": { + "light": "#11171C", + "dark": "#E8ECF0" + }, + "selectionColor": { + "light": "#2272B4", + "dark": "#8ACAFF" + }, + "visualizationColors": [ + "#FFA600", + "#FF7054", + "#DE5582", + "#995495", + "#4E5185", + "#1D425C", + "#99DDB4" + ], + "widgetHeaderAlignment": "LEFT" + } + } +} \ No newline at end of file diff --git a/databricks-skills/databricks-aibi-dashboards/.tests/generate_test_data.py b/databricks-skills/databricks-aibi-dashboards/.tests/generate_test_data.py new file mode 100644 index 00000000..5046e67a --- /dev/null +++ b/databricks-skills/databricks-aibi-dashboards/.tests/generate_test_data.py @@ -0,0 +1,258 @@ +""" +Generate the `support_cases` test table that the worked example in +`4-examples.md` queries from. + +Run this once before creating the example dashboard so the agent (or you, +manually) can render it against real data and visually inspect that the +example is state of the art. + +Usage +----- + # one-time install + pip install "databricks-connect>=15.0" + + # generate the table + python databricks-skills/databricks-aibi-dashboards/.tests/generate_test_data.py + +Env vars (all optional) + TEST_CATALOG default: main + TEST_SCHEMA default: dashboard_skill_test + TEST_TABLE default: support_cases + NUM_CASES default: 5000 (rows in the generated table) + DATABRICKS_CONFIG_PROFILE databricks-connect picks this up automatically + +After the table is written, the script prints the dataset query you can paste +into a new dashboard, and the next-step CLI command to create the dashboard +from the example JSON. +""" + +from __future__ import annotations + +import os +import random +from datetime import datetime, timedelta, timezone + +from databricks.connect import DatabricksSession +from pyspark.sql import functions as F +from pyspark.sql.types import ( + BooleanType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, + TimestampType, +) + +CATALOG = os.environ.get("TEST_CATALOG", "main") +SCHEMA = os.environ.get("TEST_SCHEMA", "dashboard_skill_test") +TABLE = os.environ.get("TEST_TABLE", "support_cases") +NUM_CASES = int(os.environ.get("NUM_CASES", "5000")) + +FQN = f"{CATALOG}.{SCHEMA}.{TABLE}" + +# Inject a volume spike around a synthetic "Product release 4.1" date so the +# dashboard tells a coherent incident story. ~700 extra cases land in the +# 6 weeks following the release (the annotation in the forecast chart points +# at exactly this date). +RELEASE_DATE = datetime(2026, 2, 16, 9, 0, tzinfo=timezone.utc) # Monday +SPIKE_CASES = 700 +SPIKE_DURATION_WEEKS = 6 +# Cases in the spike skew toward Critical/High and a worse satisfaction so the +# pivot + heatmap + map widgets all surface the impact. +SPIKE_PRIORITIES = [("Critical", 0.22), ("High", 0.38), ("Medium", 0.30), ("Low", 0.10)] + +# Realistic-ish distributions so the dashboard has meaningful patterns. +PRIORITIES = [("Critical", 0.08), ("High", 0.22), ("Medium", 0.45), ("Low", 0.25)] +CHANNELS = [("Email", 0.45), ("Chat", 0.30), ("Phone", 0.15), ("Web Form", 0.10)] +# A few major US metros — gives the symbol-map a recognizable shape. +CITIES = [ + ("Northeast", 40.7128, -74.0060), # NYC + ("Northeast", 42.3601, -71.0589), # Boston + ("Mid-Atlantic", 38.9072, -77.0369), # DC + ("Southeast", 33.7490, -84.3880), # Atlanta + ("Southeast", 25.7617, -80.1918), # Miami + ("Midwest", 41.8781, -87.6298), # Chicago + ("Midwest", 44.9778, -93.2650), # Minneapolis + ("South Central", 29.7604, -95.3698), # Houston + ("South Central", 32.7767, -96.7970), # Dallas + ("Mountain", 39.7392, -104.9903), # Denver + ("West", 37.7749, -122.4194), # SF + ("West", 34.0522, -118.2437), # LA + ("Pacific Northwest", 47.6062, -122.3321), # Seattle +] + + +def weighted_choice(pairs: list[tuple[str, float]]) -> str: + """Pick a label from [(label, weight), ...].""" + r = random.random() + cum = 0.0 + for label, w in pairs: + cum += w + if r < cum: + return label + return pairs[-1][0] + + +def make_row(idx: int, now: datetime, in_spike: bool = False) -> tuple: + """Build one synthetic support_cases row. + + Resolution time is sampled by priority — Critical resolves fastest, + Low takes longer — so the dashboard's "Avg Resolution Hours" KPI + actually varies by category. + + If `in_spike=True`, the row is part of the post-release surge: opened + within ~6 weeks after RELEASE_DATE, biased toward Critical/High + priorities, and with a slightly worse satisfaction baseline. + """ + if in_spike: + priority = weighted_choice(SPIKE_PRIORITIES) + else: + priority = weighted_choice(PRIORITIES) + channel = weighted_choice(CHANNELS) + region, lat_base, lon_base = random.choice(CITIES) + # Jitter the lat/lon so the bubble map doesn't collapse to 13 points. + customer_lat = lat_base + random.gauss(0, 0.3) + customer_lon = lon_base + random.gauss(0, 0.3) + + if in_spike: + # Cases opened in the SPIKE_DURATION_WEEKS window right after the release, + # front-loaded (peak in week 1-2, taper by week 6). + days_after_release = random.triangular(0, SPIKE_DURATION_WEEKS * 7, 7) + opened_at = RELEASE_DATE + timedelta(days=days_after_release, + hours=random.uniform(0, 24)) + else: + # Baseline: uniformly over the last 18 months. + opened_offset_days = random.uniform(0, 540) + opened_at = now - timedelta(days=opened_offset_days, + hours=random.uniform(0, 24)) + + # Resolution time depends on priority. Lognormal so the long tail looks real. + # Spike cases resolve a bit slower (team overloaded). + base_hours = {"Critical": 4, "High": 12, "Medium": 36, "Low": 72}[priority] + if in_spike: + base_hours *= 1.4 + resolution_hours = max(0.25, random.lognormvariate(0, 0.8) * base_hours / 2) + + # ~3% of cases still open (closed_at NULL). + still_open = random.random() < 0.03 and opened_at > now - timedelta(days=14) + closed_at = None if still_open else opened_at + timedelta(hours=resolution_hours) + + # Reopen rate ~7% overall, higher for Critical/High (the dashboard surfaces this). + # Spike cases also have a bumped reopen rate. + reopen_p = {"Critical": 0.12, "High": 0.10, "Medium": 0.05, "Low": 0.03}[priority] + if in_spike: + reopen_p *= 1.8 + reopened_flag = (closed_at is not None) and (random.random() < reopen_p) + + # Satisfaction inversely correlated with reopen + resolution time. + sat_base = 8.5 - (1.5 if reopened_flag else 0) - min(resolution_hours / 96, 2.5) + if in_spike: + sat_base -= 0.7 + satisfaction_score = max(1.0, min(10.0, sat_base + random.gauss(0, 0.8))) + + customer_id = f"C{random.randint(1, 800):04d}" + case_id = f"CASE-{idx:06d}" + + return ( + case_id, + opened_at, + closed_at, + priority, + channel, + region, + customer_id, + reopened_flag, + round(satisfaction_score, 1), + round(customer_lat, 4), + round(customer_lon, 4), + ) + + +SCHEMA_STRUCT = StructType([ + StructField("case_id", StringType(), nullable=False), + StructField("opened_at", TimestampType(), nullable=False), + StructField("closed_at", TimestampType(), nullable=True), + StructField("priority", StringType(), nullable=False), + StructField("channel", StringType(), nullable=False), + StructField("region_name", StringType(), nullable=False), + StructField("customer_id", StringType(), nullable=False), + StructField("reopened_flag", BooleanType(), nullable=False), + StructField("satisfaction_score", DoubleType(), nullable=False), + StructField("customer_latitude", DoubleType(), nullable=False), + StructField("customer_longitude", DoubleType(), nullable=False), +]) + + +def main() -> None: + random.seed(42) + + print(f"Connecting to Databricks (serverless compute via databricks-connect)...") + spark = DatabricksSession.builder.serverless().getOrCreate() + + print(f"Ensuring {CATALOG}.{SCHEMA} exists...") + spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}") + + now = datetime.now(timezone.utc) + baseline_n = NUM_CASES - SPIKE_CASES + print(f"Generating {baseline_n} baseline rows + {SPIKE_CASES} post-release spike rows " + f"(release={RELEASE_DATE.date()}, seed=42)...") + rows = [make_row(i + 1, now, in_spike=False) for i in range(baseline_n)] + rows += [make_row(baseline_n + i + 1, now, in_spike=True) for i in range(SPIKE_CASES)] + random.shuffle(rows) + + df = spark.createDataFrame(rows, schema=SCHEMA_STRUCT) + # Add the derived column that the dashboard's ds_support computes inline, + # so a quick sanity-check query against the table works without the dashboard. + df = df.withColumn( + "time_to_resolution_hours", + (F.unix_timestamp("closed_at") - F.unix_timestamp("opened_at")) / 3600.0, + ) + + print(f"Writing {FQN} (overwriteSchema=true)...") + df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(FQN) + + count = spark.table(FQN).count() + print(f" wrote {count} rows") + + # Sanity check — make sure the dashboard's headline KPIs are non-degenerate. + print() + print("Sanity check (matches the dashboard's MEASURE() definitions):") + spark.sql(f""" + SELECT + COUNT(*) AS total_cases, + ROUND(AVG(time_to_resolution_hours), 1) AS avg_resolution_hours, + ROUND(SUM(CASE WHEN reopened_flag THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS reopen_rate_pct, + ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction + FROM {FQN} + """).show(truncate=False) + + print() + print("=" * 70) + print("Next steps") + print("=" * 70) + print(f"1. The example dashboard's SQL references `FROM support_cases` (bare name).") + print(f" When you create the dashboard, pass:") + print(f" --dataset-catalog {CATALOG} --dataset-schema {SCHEMA}") + print(f" so the bare table name resolves to {FQN}.") + print() + print(f"2. Create the dashboard from the worked example:") + print(f" databricks aibi-dashboards create-from-skill \\") + print(f" --skill databricks-aibi-dashboards \\") + print(f" --example 4-examples.md \\") + print(f" --dataset-catalog {CATALOG} \\") + print(f" --dataset-schema {SCHEMA}") + print(f" (Adjust the command to whatever your dashboard-creation tool exposes;") + print(f" the key is that the dataset bind to {FQN}.)") + print() + print(f"3. Open the dashboard in the workspace and visually verify:") + print(f" - All 4 KPI counters show numbers (Total Cases / Avg Hours / Reopen / Satisfaction)") + print(f" - Sparkline on kpi-volume-trend renders a smooth line") + print(f" - Forecast-line connects smoothly (the bridge CTE working)") + print(f" - Symbol map shows points across the US, not all clustered at one spot") + print(f" - Pivot conditional cells show the alert-coral on high-count cells") + print(f" - Theme is warm-sunset (amber/coral/pink/purple/navy), light bg, no borders") + + +if __name__ == "__main__": + main() diff --git a/databricks-skills/databricks-aibi-dashboards/1-widget-specifications.md b/databricks-skills/databricks-aibi-dashboards/1-widget-specifications.md index 31c8bc99..e36a9606 100644 --- a/databricks-skills/databricks-aibi-dashboards/1-widget-specifications.md +++ b/databricks-skills/databricks-aibi-dashboards/1-widget-specifications.md @@ -7,6 +7,7 @@ Core widget types for AI/BI dashboards. For advanced visualizations (area, scatt - `widget.name`: alphanumeric + hyphens + underscores ONLY (max 60 characters) - `frame.title`: human-readable title (any characters allowed) - `frame.showTitle`: always set to `true` so users understand the widget +- `frame.description` + `frame.showDescription: true`: optional subtext under the title (e.g., `"All-time; 0% before the 2025-06 launch"`) — useful for giving a KPI number context without cluttering the chart itself - `displayName`: use in encodings to label axes/values clearly (e.g., "Revenue ($)", "Growth Rate (%)") - `widget.queries[].name`: use `"main_query"` for chart/counter/table widgets. Filter widgets with multiple queries can use descriptive names (see [3-filters.md](3-filters.md)) @@ -22,6 +23,7 @@ Core widget types for AI/BI dashboards. For advanced visualizations (area, scatt | bar | 3 | this file | | line | 3 | this file | | pie | 3 | this file | +| symbol-map | 2 | this file | | area | 3 | [2-advanced-widget-specifications.md](2-advanced-widget-specifications.md) | | scatter | 3 | [2-advanced-widget-specifications.md](2-advanced-widget-specifications.md) | | combo | 1 | [2-advanced-widget-specifications.md](2-advanced-widget-specifications.md) | @@ -181,14 +183,19 @@ Match the sparkline grain to whatever the surrounding charts use — consistent ### Value formatting -Format types: `number`, `number-currency`, `number-percent`. +Format types: `number`, `number-plain`, `number-currency`, `number-percent`. | Field type | Format | Why | |---|---|---| -| Money | `number-currency` + `currencyCode` + `abbreviation: "compact"` | "$1.2M" is readable, "1287394.55" isn't | +| Money | `number-currency` + `currencyCode: "USD"` (or `EUR` etc.) + `abbreviation: "compact"` | "$1.2M" is readable, "1287394.55" isn't | | Percentage | `number-percent` (data must be 0-1) | Renders "12.5%" from 0.125 | | Large count | `number` + `abbreviation: "compact"` | Renders "1.5K" / "2.3M" | | Small count (under ~1K) | `number` (no abbreviation) or omit `format` | Raw integer is fine | +| Value with custom unit (e.g., "8 hrs", "2 weeks") | `number-plain` + `formatTemplate: "{{ @formatted }} hrs"` | Append a unit cleanly without baking it into the dataset | + +Optional `format.suffix` (e.g., `"suffix": "h"`) appends a short unit directly after the number without a template — simpler than `formatTemplate` when you just need a single-char unit. + +> **Counters backed by `MEASURE()`**: omit `format` when `format.type` is plain `"number"` — the combination triggers an "automatically fixed" warning on the rendered widget. Use `number-plain`, `number-currency`, `number-percent`, or no format at all. ```json "value": { @@ -287,9 +294,9 @@ Each column object supports format, conditional styling, links, and tooltips. Co "type": "basic", "rules": [ {"condition": {"operand": {"type": "data-value", "value": "10000"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 4}}, + "backgroundColor": {"themeColorType": "visualizationColors", "position": 0}}, {"condition": {"operand": {"type": "data-value", "value": "5000"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 3}} + "backgroundColor": {"themeColorType": "visualizationColors", "position": 6}} ] }, @@ -382,16 +389,16 @@ Default behaviour: theme colors are assigned to categories in order. To pin spec "scale": { "type": "categorical", "mappings": [ - {"value": "1-Critical", "color": {"themeColorType": "visualizationColors", "position": 6}}, - {"value": "4-Low", "color": {"themeColorType": "visualizationColors", "position": 1}} + {"value": "1-Critical", "color": {"hex": "#FF7E5C"}}, + {"value": "4-Low", "color": {"themeColorType": "visualizationColors", "position": 6}} ] } } ``` -`themeColorType: "visualizationColors"` + `position: 1..N` selects from the dashboard's theme palette. For an exact hex, use `{"hex": "#FF0000"}` instead of `themeColorType`. +`themeColorType: "visualizationColors"` + `position: 0..N-1` (0-indexed) selects from the dashboard's theme palette. For semantic colors that must hold across palette changes (Critical → always red, OK → always green), pin a **literal hex** with `{"hex": "#FF0000"}` instead — palette reshuffles silently move palette-position colors. -> For continuous color ramps on quantitative encodings (e.g., choropleth, symbol-map, heatmap), use `colorRamp` — see [2-advanced-widget-specifications.md](2-advanced-widget-specifications.md). +> For continuous color ramps on quantitative encodings, use `colorRamp` — see Symbol Map below, or [Heatmap](2-advanced-widget-specifications.md#heatmap) and [Choropleth Map](2-advanced-widget-specifications.md#choropleth-map) in advanced specs. ### Annotations (event markers) @@ -433,13 +440,55 @@ Multiple annotations are allowed — add more objects to the array. For non-date "widgetType": "pie", "encodings": { "angle": {"fieldName": "revenue", "scale": {"type": "quantitative"}}, - "color": {"fieldName": "category", "scale": {"type": "categorical"}} + "color": {"fieldName": "category", "scale": {"type": "categorical"}}, + "label": {"show": true} } } ``` --- +## Symbol Map (bubble map) + +Lat/lon scatter plot on a map. Use for **point data** (customer locations, sensor readings); use `choropleth-map` for **regions** (countries, states) colored by aggregate. + +> **Strongly preferred whenever the data has a geographic dimension.** A bubble map is one of the highest-signal visuals in a dashboard — "where is the action" reads at a glance and grabs attention better than a bar chart of the same data. If the dataset has lat/lon (or a country/state column → `choropleth-map`), include a map widget. + +- `version`: **2** +- `widgetType`: "symbol-map" +- Dataset must include latitude and longitude columns (or a `GEOMETRY`/`GEOGRAPHY` column). + +```json +"spec": { + "version": 2, + "widgetType": "symbol-map", + "encodings": { + "coordinates": { + "latitude": {"fieldName": "customer_latitude"}, + "longitude": {"fieldName": "customer_longitude"} + }, + "color": { + "fieldName": "sum(satisfaction_score)", + "scale": {"type": "quantitative", + "colorRamp": {"mode": "custom-sequential", "colors": {"start": "#FF7E5C", "end": "#99DDB4"}}}, + "legend": {"hide": true} + }, + "size": {"fieldName": "count(*)", "scale": {"type": "quantitative"}} + }, + "mark": {"opacity": 0.7}, + "frame": {"showTitle": true, "title": "Customer Locations"} +} +``` + +**`colorRamp` modes:** + +- `{"mode": "custom-sequential", "colors": {"start": "#FF7E5C", "end": "#99DDB4"}}` — your own gradient between two hex stops. **Prefer this for themed dashboards** so the map ties into the palette; if directional, `start` = bad color, `end` = good color. +- `{"mode": "scheme", "scheme": ""}` — prebuilt ramps. Known names: `magma`, `viridis`, `plasma`, `inferno`, `YlGnBu`, `RdYlBu`, `blues`, `redyellowgreen`. Avoid `redyellowgreen` — clashes with most modern themes. + +For categorical color (e.g., colored by region), use `scale.type: "categorical"` with the same `mappings` syntax as bar charts. `mark.opacity` (0–1) controls point transparency — useful when many points cluster. + +--- + ## Axis Formatting Add `format` to any encoding to display values appropriately: diff --git a/databricks-skills/databricks-aibi-dashboards/2-advanced-widget-specifications.md b/databricks-skills/databricks-aibi-dashboards/2-advanced-widget-specifications.md index a4bd57ea..69cf066b 100644 --- a/databricks-skills/databricks-aibi-dashboards/2-advanced-widget-specifications.md +++ b/databricks-skills/databricks-aibi-dashboards/2-advanced-widget-specifications.md @@ -176,7 +176,7 @@ Overlays a model prediction on top of historical data — historical line contin > **Always exclude the current (in-progress) bucket from the historical series.** If you aggregate weekly and today is Tuesday, the current week's bucket is only 2 days of data — the line drops off a cliff right before the forecast starts. Filter with `WHERE bucket_start < DATE_TRUNC('', current_date())` using the **same grain as the aggregation**. ```sql -WITH original AS ( +WITH actuals AS ( SELECT DATE_TRUNC('WEEK', opened_at) AS opened_at, COUNT(*) AS count FROM support_cases -- Drop the partial-elapsed bucket. Grain MUST match the DATE_TRUNC above — @@ -185,24 +185,43 @@ WITH original AS ( GROUP BY 1 ), dates AS ( - SELECT MAX(opened_at) AS max_d, MIN(opened_at) AS min_d FROM original + SELECT MAX(opened_at) AS max_d, MIN(opened_at) AS min_d FROM actuals ), forecast AS ( - SELECT opened_at, count_forecast, count_upper, count_lower, NULL AS count + SELECT opened_at, count_forecast, count_upper, count_lower, CAST(NULL AS BIGINT) AS count FROM AI_FORECAST( - TABLE(original), + TABLE(actuals), horizon => (SELECT max_d + MAKE_DT_INTERVAL( CAST(FLOOR(DATEDIFF(max_d, min_d) * 0.5) AS INT), 0, 0, 0) FROM dates), time_col => 'opened_at', value_col => 'count' ) +), +bridge AS ( + -- One-row "seam" that carries the last actual value into the forecast columns + -- so the historical line and the forecast band visually connect instead of breaking + -- with a gap at the boundary. + SELECT a.opened_at, + a.count AS count_forecast, + a.count AS count_upper, + a.count AS count_lower, + a.count + FROM actuals a + JOIN dates d ON a.opened_at = d.max_d ) -SELECT * FROM forecast -UNION ALL -SELECT opened_at, NULL, NULL, NULL, count FROM original +SELECT opened_at, CAST(NULL AS BIGINT) AS count_forecast, CAST(NULL AS BIGINT) AS count_upper, CAST(NULL AS BIGINT) AS count_lower, count FROM actuals +UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM bridge +UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM forecast ``` -The `horizon` expression above projects forward 50% of the historical range. Tune the multiplier (0.5 → 1.0 for "predict as far as we've seen") to taste. +Three CTEs: +- **`actuals`** — historical series (`count` populated, forecast columns NULL). +- **`forecast`** — `AI_FORECAST` output (forecast columns populated, `count` NULL). +- **`bridge`** — a **single row at the last actual timestamp** with the actual value duplicated into all three forecast columns. Without it, the historical line and the forecast band have a visible gap at the boundary; with it, they connect smoothly. + +> **The final `SELECT`s must list columns explicitly, in the same order, in every branch.** `SELECT * FROM actuals` (2 cols) `UNION ALL SELECT * FROM forecast` (5 cols) errors out with `NUM_COLUMNS_MISMATCH`. Project the same 5-column shape from every CTE — fill NULLs where a branch doesn't have a value (and `CAST(NULL AS )` so the types align). + +The `horizon` expression projects forward 50% of the historical range. Tune the multiplier (0.5 → 1.0 for "predict as far as we've seen") to taste. **If you switch the aggregation grain, update both `DATE_TRUNC` calls.** They must match — a daily x-axis with a weekly cutoff filter would still show the cliff. Common pairings: @@ -269,11 +288,11 @@ A cross-tab — dimensions on rows AND columns, measures in cells. Supports per- "type": "basic", "rules": [ {"condition": {"operand": {"type": "data-value", "value": "30"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 4}}, + "backgroundColor": {"hex": "#FF7E5C"}}, {"condition": {"operand": {"type": "data-value", "value": "20"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 3}}, + "backgroundColor": {"themeColorType": "visualizationColors", "position": 0}}, {"condition": {"operand": {"type": "data-value", "value": "15"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 1}} + "backgroundColor": {"themeColorType": "visualizationColors", "position": 6}} ] } } @@ -361,40 +380,6 @@ Add more `stages` entries for multi-step flows (e.g., funnel-with-attribution: ` --- -## Symbol Map (point map) - -Lat/lon scatter plot on a map. Use this for **point data** (customer locations, sensor readings). Use `choropleth-map` for **regions** (countries, states) colored by aggregate. - -- `version`: **2** -- `widgetType`: "symbol-map" -- Dataset must include latitude and longitude columns (or a `GEOMETRY`/`GEOGRAPHY` column). - -```json -"spec": { - "version": 2, - "widgetType": "symbol-map", - "encodings": { - "coordinates": { - "latitude": {"fieldName": "customer_latitude"}, - "longitude": {"fieldName": "customer_longitude"} - }, - "color": { - "fieldName": "sum(satisfaction_score)", - "scale": {"type": "quantitative", - "colorRamp": {"mode": "scheme", "scheme": "magma"}}, - "legend": {"hide": true} - }, - "size": {"fieldName": "count(*)", "scale": {"type": "quantitative"}} - }, - "mark": {"opacity": 0.7}, - "frame": {"showTitle": true, "title": "Customer Locations"} -} -``` - -Color ramp schemes available: `magma`, `viridis`, `plasma`, `inferno`, `YlGnBu`, `RdYlBu`, plus theme-aware presets. For categorical color (e.g., colored by region instead of by intensity), use `scale.type: "categorical"` and the same `mappings` syntax as bar charts. - ---- - ## Heatmap Color-intensity grid: x-axis categorical, y-axis categorical, color = numeric aggregate. Useful for "X by Y" matrices. @@ -407,8 +392,8 @@ Color-intensity grid: x-axis categorical, y-axis categorical, color = numeric ag "version": 3, "widgetType": "heatmap", "encodings": { - "x": {"fieldName": "priority", "scale": {"type": "categorical"}}, - "y": {"fieldName": "ship_mode", "scale": {"type": "categorical"}}, + "x": {"fieldName": "priority", "scale": {"type": "categorical"}, "axis": {"hideTitle": true}}, + "y": {"fieldName": "ship_mode", "scale": {"type": "categorical"}, "axis": {"hideTitle": true}}, "color": {"fieldName": "sum(order_count)", "scale": {"type": "quantitative", "colorRamp": {"mode": "scheme", "scheme": "viridis"}}} @@ -419,6 +404,8 @@ Color-intensity grid: x-axis categorical, y-axis categorical, color = numeric ag Heatmap limit: 64K rows / 10MB. For larger data, pre-aggregate to a smaller grid. +`axis.hideTitle: true` (shown above) drops the redundant "priority" / "ship_mode" axis labels — the row/column headers already tell you what they are. Same trick works on any x/y axis encoding (line, bar, heatmap, pivot) when the column name is obvious from context. + --- ## Funnel diff --git a/databricks-skills/databricks-aibi-dashboards/4-examples.md b/databricks-skills/databricks-aibi-dashboards/4-examples.md index 99eb767d..83c51682 100644 --- a/databricks-skills/databricks-aibi-dashboards/4-examples.md +++ b/databricks-skills/databricks-aibi-dashboards/4-examples.md @@ -30,13 +30,15 @@ See [SKILL.md](SKILL.md#widget-index-version--where-documented) for the full ver ### Layout (12-col grid) ``` -y=0: Header (w=12, h=2) -y=2: KPI (w=3) | KPI w/ sparkline (w=3) | KPI (w=3) | KPI (w=3) ← fills 12 -y=5: Forecast (w=8, h=4) | Pivot summary (w=4, h=4) -y=9: Symbol map (w=8, h=5) | Histogram (w=4, h=5) -y=14: Detail table (w=12, h=6) +y=0: Header (w=12, h=3) ← story prose tying the dashboard together +y=3: KPI (w=3) | KPI w/ sparkline (w=3) | KPI (w=3) | KPI (w=3) ← fills 12 +y=6: Forecast w/ release annotation (w=8, h=6) | Histogram (w=4, h=6) +y=12: Symbol map (w=8, h=5) | Pie by channel (w=4, h=5) +y=17: Detail table (w=8, h=6) | Heatmap (w=4, h=6) ``` +This example's header carries a short narrative tying the widgets together, and the forecast widget uses a `vertical-line` annotation to mark a notable date. That's one way to structure a story — useful if there's a real inflection point in the data — but it's not required: a dashboard can also just present the metrics neutrally, or anchor the story on a different widget. Treat it as illustrative. + --- ## Full Dashboard: Support Operations @@ -48,443 +50,939 @@ y=14: Detail table (w=12, h=6) "name": "ds_support", "displayName": "Support cases", "queryLines": [ - "SELECT case_id, opened_at, closed_at, priority, channel, region_name,", - " customer_id, reopened_flag, satisfaction_score,", - " customer_latitude, customer_longitude,", - " (unix_timestamp(closed_at) - unix_timestamp(opened_at)) / 3600.0 AS time_to_resolution_hours", + "SELECT case_id, opened_at, closed_at, priority, channel, region_name,\n", + " customer_id, reopened_flag, satisfaction_score,\n", + " customer_latitude, customer_longitude,\n", + " (unix_timestamp(closed_at) - unix_timestamp(opened_at)) / 3600.0 AS time_to_resolution_hours\n", "FROM support_cases" ], "columns": [ - {"displayName": "Total Cases", "description": "Count of support cases", - "expression": "COUNT(`case_id`)"}, - {"displayName": "Avg Resolution Hours", - "description": "Mean resolution time across closed cases", - "expression": "AVG(`time_to_resolution_hours`)"}, - {"displayName": "Reopen Rate %", - "description": "Percent of cases reopened after closure", - "expression": "SUM(CASE WHEN `reopened_flag`=true THEN 1 ELSE 0 END) * 100.0 / COUNT(`case_id`)"}, - {"displayName": "Avg Satisfaction", - "description": "Average customer satisfaction (1-10)", - "expression": "AVG(`satisfaction_score`)"}, - {"displayName": "Priority Level", - "description": "Sortable priority label", - "expression": "CASE WHEN `priority`='Critical' THEN '1-Critical' WHEN `priority`='High' THEN '2-High' WHEN `priority`='Medium' THEN '3-Medium' ELSE '4-Low' END"} + { + "displayName": "Total Cases", + "description": "Count of support cases", + "expression": "COUNT(`case_id`)" + }, + { + "displayName": "Avg Resolution Hours", + "description": "Mean resolution time across closed cases", + "expression": "AVG(`time_to_resolution_hours`)" + }, + { + "displayName": "Reopen Rate %", + "description": "Percent of cases reopened after closure", + "expression": "SUM(CASE WHEN `reopened_flag`=true THEN 1 ELSE 0 END) * 1.0 / COUNT(`case_id`)" + }, + { + "displayName": "Avg Satisfaction", + "description": "Average customer satisfaction (1-10)", + "expression": "AVG(`satisfaction_score`)" + }, + { + "displayName": "Priority Level", + "description": "Sortable priority label", + "expression": "CASE WHEN `priority`='Critical' THEN '1-Critical' WHEN `priority`='High' THEN '2-High' WHEN `priority`='Medium' THEN '3-Medium' ELSE '4-Low' END" + } ] }, { "name": "ds_forecast", "displayName": "Cases forecast", "queryLines": [ - "WITH original AS (\n", - " -- Cutoff grain MUST match the DATE_TRUNC grain to drop the partial week\n", + "WITH actuals AS (\n", " SELECT DATE_TRUNC('WEEK', opened_at) AS opened_at, COUNT(*) AS count\n", " FROM support_cases\n", " WHERE DATE_TRUNC('WEEK', opened_at) < DATE_TRUNC('WEEK', current_date())\n", " GROUP BY 1\n", "),\n", - "dates AS (SELECT MAX(opened_at) AS max_d, MIN(opened_at) AS min_d FROM original),\n", + "dates AS (SELECT MAX(opened_at) AS max_d, MIN(opened_at) AS min_d FROM actuals),\n", "forecast AS (\n", - " SELECT opened_at, count_forecast, count_upper, count_lower, NULL AS count\n", - " FROM AI_FORECAST(TABLE(original),\n", + " SELECT opened_at, count_forecast, count_upper, count_lower, CAST(NULL AS BIGINT) AS count\n", + " FROM AI_FORECAST(TABLE(actuals),\n", " horizon => (SELECT max_d + MAKE_DT_INTERVAL(CAST(FLOOR(DATEDIFF(max_d, min_d) * 0.5) AS INT), 0, 0, 0) FROM dates),\n", " time_col => 'opened_at', value_col => 'count')\n", + "),\n", + "bridge AS (\n", + " SELECT a.opened_at, a.count AS count_forecast, a.count AS count_upper, a.count AS count_lower, a.count\n", + " FROM actuals a JOIN dates d ON a.opened_at = d.max_d\n", ")\n", - "SELECT * FROM forecast UNION ALL SELECT opened_at, NULL, NULL, NULL, count FROM original" + "SELECT opened_at, CAST(NULL AS BIGINT) AS count_forecast, CAST(NULL AS BIGINT) AS count_upper, CAST(NULL AS BIGINT) AS count_lower, count FROM actuals\n", + "UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM bridge\n", + "UNION ALL SELECT opened_at, count_forecast, count_upper, count_lower, count FROM forecast" ] } ], - "pages": [ { "name": "overview", "displayName": "Overview", - "pageType": "PAGE_TYPE_CANVAS", - "layoutVersion": "GRID_V1", "layout": [ - { "widget": { "name": "header", - "multilineTextboxSpec": {"lines": ["# Support Operations\n\nWeekly volume, resolution speed, and forecast."]} + "multilineTextboxSpec": { + "lines": [ + "# Support Operations \u2014 Post-Release Surge (4.1)\n\n**The story this week:** a clear volume spike in mid-February \u2014 the date the new Product 4.1 release went out (marked on the forecast chart). The release introduced a regression that drove a wave of Critical/High cases over the following 6 weeks: case volume jumps, average resolution time creeps up, reopen rate climbs, and customer satisfaction dips on the affected metros \u2014 visible on the satisfaction map as warmer (lower) scores. The forecast extends the trend forward so the team can size the cleanup ahead. Use the filters page to slice by region or resolution-time bucket to localize the impact." + ] + } }, - "position": {"x": 0, "y": 0, "width": 12, "height": 2} + "position": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } }, - { "widget": { "name": "kpi-total-cases", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [{"name": "measure(Total Cases)", "expression": "MEASURE(`Total Cases`)"}], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "counter", + "version": 2, + "widgetType": "counter", "encodings": { - "value": {"fieldName": "measure(Total Cases)", "displayName": "Total Cases"} + "value": { + "fieldName": "measure(Total Cases)", + "displayName": "Total Cases" + } }, - "frame": {"title": "Total Cases", "showTitle": true} + "frame": { + "title": "Total Cases", + "showTitle": true + } } }, - "position": {"x": 0, "y": 2, "width": 3, "height": 3} + "position": { + "x": 0, + "y": 3, + "width": 3, + "height": 3 + } }, - { "widget": { "name": "kpi-volume-trend", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "weekly(opened_at)", "expression": "DATE_TRUNC(\"WEEK\", `opened_at`)"}, - {"name": "measure(Total Cases)", "expression": "MEASURE(`Total Cases`)"} - ], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "weekly(opened_at)", + "expression": "DATE_TRUNC(\"WEEK\", `opened_at`)" + }, + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + } + ], + "disaggregated": false, + "orders": [ + { + "direction": "DESC", + "expression": "DATE_TRUNC(\"WEEK\", `opened_at`)" + } + ] + } } - }], + ], "spec": { - "version": 2, "widgetType": "counter", - "encodings": { - "value": {"fieldName": "measure(Total Cases)", "displayName": "This Week"}, - "period": {"fieldName": "weekly(opened_at)"} + "version": 2, + "frame": { + "title": "Daily Case Volume ", + "showTitle": true }, - "frame": {"title": "Volume (with trend)", "showTitle": true} + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Total Cases)", + "displayName": "This Week" + }, + "period": { + "fieldName": "weekly(opened_at)" + } + } } }, - "position": {"x": 3, "y": 2, "width": 3, "height": 3} + "position": { + "x": 3, + "y": 3, + "width": 3, + "height": 3 + } }, - { "widget": { "name": "kpi-resolution", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [{"name": "measure(Avg Resolution Hours)", "expression": "MEASURE(`Avg Resolution Hours`)"}], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Avg Resolution Hours)", + "expression": "MEASURE(`Avg Resolution Hours`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "counter", - "encodings": { - "value": {"fieldName": "measure(Avg Resolution Hours)", "displayName": "Avg Hours"} + "version": 2, + "frame": { + "title": "Avg Resolution Time", + "showTitle": true }, - "frame": {"title": "Avg Resolution Time", "showTitle": true} + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Avg Resolution Hours)", + "formatTemplate": "{{ @formatted }} hrs", + "displayName": "Avg Hours" + } + } } }, - "position": {"x": 6, "y": 2, "width": 3, "height": 3} + "position": { + "x": 6, + "y": 3, + "width": 3, + "height": 3 + } }, - { "widget": { "name": "kpi-reopen", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [{"name": "measure(Reopen Rate %)", "expression": "MEASURE(`Reopen Rate %`)"}], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Reopen Rate %)", + "expression": "MEASURE(`Reopen Rate %`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "counter", - "encodings": { - "value": {"fieldName": "measure(Reopen Rate %)", "displayName": "Reopen Rate"} + "version": 2, + "frame": { + "title": "Reopen Rate (%)", + "showTitle": true }, - "frame": {"title": "Reopen Rate (%)", "showTitle": true} + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "measure(Reopen Rate %)", + "format": { + "type": "number-percent", + "decimalPlaces": { + "type": "max", + "places": 2 + } + }, + "displayName": "Reopen Rate" + } + } } }, - "position": {"x": 9, "y": 2, "width": 3, "height": 3} + "position": { + "x": 9, + "y": 3, + "width": 3, + "height": 3 + } }, - { "widget": { "name": "case-forecast", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_forecast", - "fields": [ - {"name": "opened_at", "expression": "`opened_at`"}, - {"name": "count", "expression": "`count`"}, - {"name": "count_forecast", "expression": "`count_forecast`"}, - {"name": "count_upper", "expression": "`count_upper`"}, - {"name": "count_lower", "expression": "`count_lower`"} - ], - "disaggregated": true + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_forecast", + "fields": [ + { + "name": "opened_at", + "expression": "`opened_at`" + }, + { + "name": "count", + "expression": "`count`" + }, + { + "name": "count_forecast", + "expression": "`count_forecast`" + }, + { + "name": "count_upper", + "expression": "`count_upper`" + }, + { + "name": "count_lower", + "expression": "`count_lower`" + } + ], + "disaggregated": true + } } - }], + ], "spec": { - "version": 1, "widgetType": "forecast-line", + "version": 1, + "widgetType": "forecast-line", "encodings": { - "x": {"fieldName": "opened_at", "scale": {"type": "temporal"}}, + "x": { + "fieldName": "opened_at", + "scale": { + "type": "temporal" + } + }, "y": { - "scale": {"type": "quantitative"}, - "original": {"fieldName": "count", "displayName": "Cases"}, - "prediction": {"fieldName": "count_forecast", "displayName": "Forecast"}, - "predictionUpper": {"fieldName": "count_upper"}, - "predictionLower": {"fieldName": "count_lower"} + "scale": { + "type": "quantitative", + "domainMin": 0 + }, + "original": { + "fieldName": "count", + "displayName": "Cases" + }, + "prediction": { + "fieldName": "count_forecast", + "displayName": "Forecast" + }, + "predictionUpper": { + "fieldName": "count_upper" + }, + "predictionLower": { + "fieldName": "count_lower" + } } }, "annotations": [ { "type": "vertical-line", "encodings": { - "x": {"dataValue": "2024-11-28T12:00:00.000", "dataType": "DATETIME"}, - "label": {"value": "Thanksgiving"}, - "color": {"value": {"themeColorType": "visualizationColors", "position": 3}} + "x": { + "dataValue": "2026-02-16T09:00:00.000", + "dataType": "DATETIME" + }, + "label": { + "value": "Product release 4.1" + }, + "color": { + "value": { + "hex": "#FF7E5C" + } + } } } ], - "frame": {"showTitle": true, "title": "Case Volume — actuals + forecast"} + "frame": { + "showTitle": true, + "title": "Case Volume \u2014 actuals + forecast" + } } }, - "position": {"x": 0, "y": 5, "width": 8, "height": 4} + "position": { + "x": 0, + "y": 6, + "width": 8, + "height": 6 + } }, - { "widget": { "name": "priority-by-channel", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "channel", "expression": "`channel`"}, - {"name": "Priority Level", "expression": "`Priority Level`"}, - {"name": "count(case_id)", "expression": "COUNT(`case_id`)"} - ], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "count(case_id)", + "expression": "COUNT(`case_id`)" + }, + { + "name": "Priority Level", + "expression": "`Priority Level`" + }, + { + "name": "channel", + "expression": "`channel`" + } + ], + "disaggregated": false + } } - }], + ], "spec": { "version": 3, - "widgetType": "pivot", + "frame": { + "showTitle": true, + "title": "Cases by channel \u00d7 priority" + }, + "widgetType": "heatmap", "encodings": { - "rows": [{"fieldName": "channel"}], - "columns": [{"fieldName": "Priority Level"}], - "cell": { - "type": "multi-cell", - "fields": [{ - "fieldName": "count(case_id)", "cellType": "text", - "style": { - "type": "basic", - "rules": [ - {"condition": {"operand": {"type": "data-value", "value": "30"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 6}}, - {"condition": {"operand": {"type": "data-value", "value": "10"}, "operator": ">="}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 3}} - ] + "x": { + "fieldName": "Priority Level", + "scale": { + "type": "categorical" + } + }, + "y": { + "fieldName": "channel", + "scale": { + "type": "categorical" + } + }, + "color": { + "fieldName": "count(case_id)", + "scale": { + "type": "quantitative", + "colorRamp": { + "mode": "custom-sequential", + "colors": { + "start": "#FFA600", + "end": "#995495" + } } - }] + } + }, + "label": { + "show": true } - }, - "frame": {"showTitle": true, "title": "Cases by channel × priority"} + } } }, - "position": {"x": 8, "y": 5, "width": 4, "height": 4} + "position": { + "x": 8, + "y": 17, + "width": 4, + "height": 6 + } }, - { "widget": { "name": "customer-map", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "customer_latitude", "expression": "`customer_latitude`"}, - {"name": "customer_longitude", "expression": "`customer_longitude`"}, - {"name": "measure(Avg Satisfaction)", "expression": "MEASURE(`Avg Satisfaction`)"}, - {"name": "count(*)", "expression": "COUNT(`*`)"} - ], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Avg Satisfaction)", + "expression": "MEASURE(`Avg Satisfaction`)" + }, + { + "name": "customer_latitude", + "expression": "`customer_latitude`" + }, + { + "name": "customer_longitude", + "expression": "`customer_longitude`" + }, + { + "name": "count(*)", + "expression": "COUNT(`*`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "symbol-map", + "version": 2, + "frame": { + "showTitle": true, + "title": "Customer Satisfaction Map" + }, + "mark": { + "opacity": 0.7 + }, + "widgetType": "symbol-map", "encodings": { "coordinates": { - "latitude": {"fieldName": "customer_latitude"}, - "longitude": {"fieldName": "customer_longitude"} + "latitude": { + "fieldName": "customer_latitude" + }, + "longitude": { + "fieldName": "customer_longitude" + } }, "color": { "fieldName": "measure(Avg Satisfaction)", - "scale": {"type": "quantitative", "colorRamp": {"mode": "scheme", "scheme": "RdYlBu"}} + "scale": { + "type": "quantitative", + "colorRamp": { + "mode": "custom-sequential", + "colors": { + "start": "#FFDC00", + "end": "#995495" + } + } + } }, - "size": {"fieldName": "count(*)", "scale": {"type": "quantitative"}} - }, - "mark": {"opacity": 0.7}, - "frame": {"showTitle": true, "title": "Customer Satisfaction Map"} + "size": { + "fieldName": "count(*)", + "scale": { + "type": "quantitative" + } + } + } } }, - "position": {"x": 0, "y": 9, "width": 8, "height": 5} + "position": { + "x": 0, + "y": 12, + "width": 8, + "height": 5 + } }, - { "widget": { "name": "resolution-distribution", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "bin(time_to_resolution_hours, binWidth=2)", - "expression": "BIN_FLOOR(`time_to_resolution_hours`, 2)"}, - {"name": "count(*)", "expression": "COUNT(`*`)"} - ], - "disaggregated": false + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "channel", + "expression": "`channel`" + }, + { + "name": "bin(time_to_resolution_hours, binWidth=2)", + "expression": "BIN_FLOOR(`time_to_resolution_hours`, 2)" + }, + { + "name": "count(*)", + "expression": "COUNT(`*`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 3, "widgetType": "histogram", - "encodings": { - "x": {"fieldName": "bin(time_to_resolution_hours, binWidth=2)", - "scale": {"type": "quantitative"}}, - "y": {"fieldName": "count(*)", "scale": {"type": "quantitative"}} + "version": 3, + "frame": { + "showTitle": true, + "title": "Resolution time (hours)" }, - "frame": {"showTitle": true, "title": "Resolution time (hours)"} + "widgetType": "histogram", + "encodings": { + "x": { + "fieldName": "bin(time_to_resolution_hours, binWidth=2)", + "scale": { + "type": "quantitative", + "domain": { + "max": 175 + } + } + }, + "y": { + "fieldName": "count(*)", + "scale": { + "type": "quantitative" + } + }, + "color": { + "fieldName": "channel", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "Email", + "color": "#FF7054" + } + ] + } + } + } } }, - "position": {"x": 8, "y": 9, "width": 4, "height": 5} + "position": { + "x": 8, + "y": 6, + "width": 4, + "height": 6 + } }, - { "widget": { "name": "case-detail", - "queries": [{ - "name": "main_query", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "case_id", "expression": "`case_id`"}, - {"name": "opened_at", "expression": "`opened_at`"}, - {"name": "channel", "expression": "`channel`"}, - {"name": "Priority Level", "expression": "`Priority Level`"}, - {"name": "time_to_resolution_hours", "expression": "`time_to_resolution_hours`"}, - {"name": "satisfaction_score", "expression": "`satisfaction_score`"} - ], - "disaggregated": true + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "case_id", + "expression": "`case_id`" + }, + { + "name": "opened_at", + "expression": "`opened_at`" + }, + { + "name": "channel", + "expression": "`channel`" + }, + { + "name": "Priority Level", + "expression": "`Priority Level`" + }, + { + "name": "time_to_resolution_hours", + "expression": "`time_to_resolution_hours`" + }, + { + "name": "satisfaction_score", + "expression": "`satisfaction_score`" + } + ], + "disaggregated": true + } } - }], + ], "spec": { - "version": 2, "widgetType": "table", + "version": 2, + "widgetType": "table", "encodings": { "columns": [ - {"fieldName": "case_id", "displayName": "Case"}, - {"fieldName": "opened_at", "displayName": "Opened"}, - {"fieldName": "channel", "displayName": "Channel"}, - {"fieldName": "Priority Level", "displayName": "Priority"}, - {"fieldName": "time_to_resolution_hours", "displayName": "Hours to resolve", - "format": {"type": "number", "decimalPlaces": {"type": "max", "places": 1}}, - "style": { - "type": "basic", - "rules": [ - {"condition": {"operand": {"type": "data-value", "value": "24"}, "operator": ">"}, - "backgroundColor": {"themeColorType": "visualizationColors", "position": 6}} - ] - }}, - {"fieldName": "satisfaction_score", "displayName": "CSAT"} + { + "fieldName": "case_id", + "displayName": "Case" + }, + { + "fieldName": "opened_at", + "displayName": "Opened" + }, + { + "fieldName": "channel", + "displayName": "Channel" + }, + { + "fieldName": "Priority Level", + "displayName": "Priority" + }, + { + "fieldName": "time_to_resolution_hours", + "displayName": "Hours to resolve", + "format": { + "type": "number", + "decimalPlaces": { + "type": "max", + "places": 1 + } + }, + "style": { + "type": "basic", + "rules": [ + { + "condition": { + "operand": { + "type": "data-value", + "value": "24" + }, + "operator": ">" + }, + "backgroundColor": { + "hex": "#FF7E5C" + } + } + ] + } + }, + { + "fieldName": "satisfaction_score", + "displayName": "CSAT" + } ] }, - "frame": {"showTitle": true, "title": "Case Detail"} + "frame": { + "showTitle": true, + "title": "Case Detail" + } } }, - "position": {"x": 0, "y": 14, "width": 12, "height": 6} + "position": { + "x": 0, + "y": 17, + "width": 8, + "height": 6 + } + }, + { + "widget": { + "name": "b4dd0785", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "measure(Total Cases)", + "expression": "MEASURE(`Total Cases`)" + }, + { + "name": "channel", + "expression": "`channel`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "frame": { + "showTitle": true, + "title": "Cases by channel", + "description": "Distribution of support cases across intake channels." + }, + "widgetType": "pie", + "encodings": { + "angle": { + "fieldName": "measure(Total Cases)", + "scale": { + "type": "quantitative" + }, + "displayName": "Cases" + }, + "color": { + "fieldName": "channel", + "displayName": "Channel", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "Email", + "color": "#FF7054" + }, + { + "value": "Chat", + "color": "#FFA600" + }, + { + "value": "Phone", + "color": "#DE5582" + }, + { + "value": "Web Form", + "color": "#995495" + } + ] + } + }, + "label": { + "show": true + } + } + } + }, + "position": { + "x": 8, + "y": 12, + "width": 4, + "height": 5 + } } - ] + ], + "pageType": "PAGE_TYPE_CANVAS", + "layoutVersion": "GRID_V1" }, - { "name": "filters", "displayName": "Filters", - "pageType": "PAGE_TYPE_GLOBAL_FILTERS", - "layoutVersion": "GRID_V1", "layout": [ { "widget": { "name": "filter-date", - "queries": [{ - "name": "ds_date", - "query": { - "datasetName": "ds_support", - "fields": [{"name": "opened_at", "expression": "`opened_at`"}], - "disaggregated": false + "queries": [ + { + "name": "ds_date", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "opened_at", + "expression": "`opened_at`" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "filter-date-range-picker", - "encodings": {"fields": [{"fieldName": "opened_at", "queryName": "ds_date"}]}, - "frame": {"showTitle": true, "title": "Date"} + "version": 2, + "widgetType": "filter-date-range-picker", + "encodings": { + "fields": [ + { + "fieldName": "opened_at", + "queryName": "ds_date" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Date" + } } }, - "position": {"x": 0, "y": 0, "width": 4, "height": 2} + "position": { + "x": 0, + "y": 0, + "width": 4, + "height": 2 + } }, { "widget": { "name": "filter-region", - "queries": [{ - "name": "ds_region", - "query": { - "datasetName": "ds_support", - "fields": [{"name": "region_name", "expression": "`region_name`"}], - "disaggregated": false + "queries": [ + { + "name": "ds_region", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "region_name", + "expression": "`region_name`" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "filter-multi-select", - "encodings": {"fields": [{"fieldName": "region_name", "queryName": "ds_region", "displayName": "Region"}]}, - "frame": {"showTitle": true, "title": "Region"} + "version": 2, + "widgetType": "filter-multi-select", + "encodings": { + "fields": [ + { + "fieldName": "region_name", + "queryName": "ds_region", + "displayName": "Region" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Region" + } } }, - "position": {"x": 4, "y": 0, "width": 4, "height": 2} + "position": { + "x": 4, + "y": 0, + "width": 4, + "height": 2 + } }, { "widget": { "name": "filter-resolution-time", - "queries": [{ - "name": "ds_resolution", - "query": { - "datasetName": "ds_support", - "fields": [ - {"name": "min(time_to_resolution_hours)", "expression": "MIN(`time_to_resolution_hours`)"}, - {"name": "max(time_to_resolution_hours)", "expression": "MAX(`time_to_resolution_hours`)"} - ], - "disaggregated": false + "queries": [ + { + "name": "ds_resolution", + "query": { + "datasetName": "ds_support", + "fields": [ + { + "name": "min(time_to_resolution_hours)", + "expression": "MIN(`time_to_resolution_hours`)" + }, + { + "name": "max(time_to_resolution_hours)", + "expression": "MAX(`time_to_resolution_hours`)" + } + ], + "disaggregated": false + } } - }], + ], "spec": { - "version": 2, "widgetType": "range-slider", - "encodings": {"fields": [{"fieldName": "time_to_resolution_hours", "queryName": "ds_resolution"}]}, - "frame": {"showTitle": true, "title": "Resolution time (hrs)"} + "version": 2, + "widgetType": "range-slider", + "encodings": { + "fields": [ + { + "fieldName": "time_to_resolution_hours", + "queryName": "ds_resolution" + } + ] + }, + "frame": { + "showTitle": true, + "title": "Resolution time (hrs)" + } } }, - "position": {"x": 8, "y": 0, "width": 4, "height": 2} + "position": { + "x": 8, + "y": 0, + "width": 4, + "height": 2 + } } - ] + ], + "pageType": "PAGE_TYPE_GLOBAL_FILTERS", + "layoutVersion": "GRID_V1" } ], - "uiSettings": { "theme": { - "canvasBackgroundColor": {"light": "#FFFFFF", "dark": "#1F272D"}, - "widgetBackgroundColor": {"light": "#FFFFFF", "dark": "#11171C"}, - "widgetBorderColor": {"light": "#FFFFFF", "dark": "#11171C"}, - "fontColor": {"light": "#11171C", "dark": "#E8ECF0"}, - "selectionColor": {"light": "#2272B4", "dark": "#8ACAFF"}, + "canvasBackgroundColor": { + "light": "#FCFCFC", + "dark": "#1F272D" + }, + "widgetBackgroundColor": { + "light": "#FFFFFF", + "dark": "#11171C" + }, + "fontColor": { + "light": "#11171C", + "dark": "#E8ECF0" + }, + "selectionColor": { + "light": "#2272B4", + "dark": "#8ACAFF" + }, "visualizationColors": [ - "#3B82F6", - "#10B981", - "#F59E0B", - "#8B5CF6", - "#14B8A6", - "#EF4444", - "#6B7280" + "#FFA600", + "#FF7054", + "#DE5582", + "#995495", + "#4E5185", + "#1D425C", + "#99DDB4" ], "widgetHeaderAlignment": "LEFT" } @@ -492,7 +990,7 @@ y=14: Detail table (w=12, h=6) } ``` -The palette puts the Critical-red at `position: 6` (`#EF4444`) and the warning-amber at `position: 3` (`#F59E0B`) — that's what the pivot conditional rules and the forecast annotation reference. Position 1 (blue) is the default for chart series that don't pin a value. +This is the "warm sunset" family used in the live Customer Support dashboard — amber → coral → pink → purple → navy, plus a mint-green at position 6 (0-indexed) for "good/safe" semantic use. The categorical palette covers chart series; **the alert/critical color (`#FF7E5C`) is pinned as a literal `hex` in the conditional-cell rules and the annotation** (NOT a palette position), so semantic meaning holds even if the palette is reshuffled later. ## What each widget demonstrates diff --git a/databricks-skills/databricks-aibi-dashboards/SKILL.md b/databricks-skills/databricks-aibi-dashboards/SKILL.md index e2edccdc..24ab5b5f 100644 --- a/databricks-skills/databricks-aibi-dashboards/SKILL.md +++ b/databricks-skills/databricks-aibi-dashboards/SKILL.md @@ -32,26 +32,26 @@ A dashboard should be showing something relevant for a human, typically some KPI | Widget Type | Version | Documented in | |-------------|---------|---------------| -| `counter` (KPI + sparkline + comparison) | **2** | [1-widget-specifications.md#counter-kpi](1-widget-specifications.md#counter-kpi) (L71) | -| `table` | **2** | [1-widget-specifications.md#table](1-widget-specifications.md#table) (L235) | -| `bar`, `line` | **3** | [1-widget-specifications.md#line--bar-charts](1-widget-specifications.md#line--bar-charts) (L310) | -| `pie` | **3** | [1-widget-specifications.md#pie-chart](1-widget-specifications.md#pie-chart) (L422) | -| text (markdown, no spec block) | N/A | [1-widget-specifications.md#text-headersdescriptions](1-widget-specifications.md#text-headersdescriptions) (L33) | +| text (markdown, no spec block) | N/A | [1-widget-specifications.md#text-headersdescriptions](1-widget-specifications.md#text-headersdescriptions) (L35) | +| `counter` (KPI + sparkline + comparison) | **2** | [1-widget-specifications.md#counter-kpi](1-widget-specifications.md#counter-kpi) (L73) | +| `table` | **2** | [1-widget-specifications.md#table](1-widget-specifications.md#table) (L242) | +| `bar`, `line` | **3** | [1-widget-specifications.md#line--bar-charts](1-widget-specifications.md#line--bar-charts) (L317) | +| `pie` | **3** | [1-widget-specifications.md#pie-chart](1-widget-specifications.md#pie-chart) (L429) | +| `symbol-map` (lat/lon point map) | **2** | [1-widget-specifications.md#symbol-map-bubble-map](1-widget-specifications.md#symbol-map-bubble-map) (L451) | | `area` | **3** | [2-advanced-widget-specifications.md#area-chart](2-advanced-widget-specifications.md#area-chart) (L7) | | `scatter` | **3** | [2-advanced-widget-specifications.md#scatter-plot--bubble-chart](2-advanced-widget-specifications.md#scatter-plot--bubble-chart) (L34) | | `combo` (bar+line, dual-axis) | **1** | [2-advanced-widget-specifications.md#combo-chart-bar--line](2-advanced-widget-specifications.md#combo-chart-bar--line) (L57) | -| `forecast-line` (with `AI_FORECAST` SQL) | **1** | [2-advanced-widget-specifications.md#forecast-line-with-ai_forecast](2-advanced-widget-specifications.md#forecast-line-with-ai_forecast) (L166) | -| `pivot` (with conditional cell rules) | **3** | [2-advanced-widget-specifications.md#pivot](2-advanced-widget-specifications.md#pivot) (L241) | -| `histogram` (with `bin(col, binWidth=N)`) | **3** | [2-advanced-widget-specifications.md#histogram](2-advanced-widget-specifications.md#histogram) (L302) | -| `heatmap` | **3** | [2-advanced-widget-specifications.md#heatmap](2-advanced-widget-specifications.md#heatmap) (L398) | -| `funnel` | **1** | [2-advanced-widget-specifications.md#funnel](2-advanced-widget-specifications.md#funnel) (L424) | -| `sankey` | **1** | [2-advanced-widget-specifications.md#sankey](2-advanced-widget-specifications.md#sankey) (L338) | -| `box` | **1** | [2-advanced-widget-specifications.md#box](2-advanced-widget-specifications.md#box) (L448) | -| `waterfall` | **1** | [2-advanced-widget-specifications.md#waterfall](2-advanced-widget-specifications.md#waterfall) (L470) | | `choropleth-map` (regions colored by value) | **1** | [2-advanced-widget-specifications.md#choropleth-map](2-advanced-widget-specifications.md#choropleth-map) (L109) | -| `symbol-map` (lat/lon point map) | **2** | [2-advanced-widget-specifications.md#symbol-map-point-map](2-advanced-widget-specifications.md#symbol-map-point-map) (L364) | +| `forecast-line` (with `AI_FORECAST` SQL) | **1** | [2-advanced-widget-specifications.md#forecast-line-with-ai_forecast](2-advanced-widget-specifications.md#forecast-line-with-ai_forecast) (L166) | +| `pivot` (with conditional cell rules) | **3** | [2-advanced-widget-specifications.md#pivot](2-advanced-widget-specifications.md#pivot) (L260) | +| `histogram` (with `bin(col, binWidth=N)`) | **3** | [2-advanced-widget-specifications.md#histogram](2-advanced-widget-specifications.md#histogram) (L321) | +| `sankey` | **1** | [2-advanced-widget-specifications.md#sankey](2-advanced-widget-specifications.md#sankey) (L357) | +| `heatmap` | **3** | [2-advanced-widget-specifications.md#heatmap](2-advanced-widget-specifications.md#heatmap) (L383) | +| `funnel` | **1** | [2-advanced-widget-specifications.md#funnel](2-advanced-widget-specifications.md#funnel) (L411) | +| `box` | **1** | [2-advanced-widget-specifications.md#box](2-advanced-widget-specifications.md#box) (L435) | +| `waterfall` | **1** | [2-advanced-widget-specifications.md#waterfall](2-advanced-widget-specifications.md#waterfall) (L457) | | `filter-single-select`, `filter-multi-select`, `filter-date-range-picker` | **2** | [3-filters.md#filter-widget-structure](3-filters.md#filter-widget-structure) (L32) | -| `range-slider` | **2** | [3-filters.md#range-slider-numeric-range-filter](3-filters.md#range-slider-numeric-range-filter) (L239) | +| `range-slider` | **2** | [3-filters.md#range-slider-numeric-range-filter](3-filters.md#range-slider-numeric-range-filter) (L282) | > Cohort retention charts are built as a `pivot` with a color-scale cell style — there is no `cohort` widget type. See pivot in [2-advanced-widget-specifications.md](2-advanced-widget-specifications.md). @@ -214,9 +214,9 @@ Every dashboard's `serialized_dashboard` content must follow this exact structur - `pageType`: Required on every page (`PAGE_TYPE_CANVAS` or `PAGE_TYPE_GLOBAL_FILTERS`) - Query binding: `query.fields[].name` must exactly match `encodings.*.fieldName` -### Dashboard Theme (Optional) +### Theme & Color (always set this — it makes or breaks the dashboard) -Top-level `uiSettings.theme` controls colors, fonts, and widget chrome across every widget on the dashboard. Without it, the dashboard inherits the workspace default. With it, you get a consistent palette across all charts, plus the index that `{"themeColorType": "visualizationColors", "position": N}` in per-widget specs resolves against. +Top-level `uiSettings.theme` controls colors, fonts, and widget chrome across every widget on the dashboard. Without it, the dashboard inherits the workspace default and looks generic. **Set the full block on every dashboard you create** — a coherent palette is the single highest-impact polish item. ```json { @@ -224,9 +224,8 @@ Top-level `uiSettings.theme` controls colors, fonts, and widget chrome across ev "pages": [...], "uiSettings": { "theme": { - "canvasBackgroundColor": {"light": "#FFFFFF", "dark": "#1F272D"}, + "canvasBackgroundColor": {"light": "#FCFCFC", "dark": "#1F272D"}, "widgetBackgroundColor": {"light": "#FFFFFF", "dark": "#11171C"}, - "widgetBorderColor": {"light": "#FFFFFF", "dark": "#11171C"}, "fontColor": {"light": "#11171C", "dark": "#E8ECF0"}, "selectionColor": {"light": "#2272B4", "dark": "#8ACAFF"}, "visualizationColors": [ @@ -239,11 +238,32 @@ Top-level `uiSettings.theme` controls colors, fonts, and widget chrome across ev } ``` -- `visualizationColors` is the **ordered palette** chart series and category mappings cycle through. `position: 1` is the first color (`#FFA600` above), `position: 6` is the 6th (`#1D425C`). Length is whatever you want — 5-8 colors is typical. -- Background / font / selection colors take `light` + `dark` pairs; the dashboard automatically applies the right pair based on the viewer's mode. -- `widgetBorderColor`: set this to the **same value as `widgetBackgroundColor`** (as in the example) to hide widget borders — the default border looks busy in dense dashboards and most demos look cleaner without it. +**Theme keys** (mechanics): + +- `visualizationColors`: ordered palette every chart series and category mapping cycles through. **Positions are 0-indexed**: `position: 0` = first color (`#FFA600` above), `position: 6` = seventh (`#99DDB4`). Length 5–8 is typical. +- Background / font / selection colors take `light` + `dark` pairs; the dashboard auto-selects based on viewer mode. - `widgetHeaderAlignment`: `"LEFT"` (default), `"CENTER"`, or `"RIGHT"`. -- Per-widget color references use `{"themeColorType": "visualizationColors", "position": N}` to pin a value to a specific slot in this palette (e.g., Critical → position 6 → always red, regardless of how the chart sorts). For an exact hex outside the palette, use `{"hex": "#FF0000"}` instead. +- Per-widget color references: `{"themeColorType": "visualizationColors", "position": N}` (0-indexed) to pin to a palette slot, or `{"hex": "#FF0000"}` for an exact color outside the palette. + +**Palette-design rules** (this is what separates a polished dashboard from a noisy one): + +1. **One coherent color family per dashboard, distinct across the suite.** Walk **across hues** (e.g., amber → coral → pink → purple → navy), not one color faded toward white — a single-hue lightness ramp reads as one color and the viewer can't tell categories apart. Adjacent stops must be visually distinct: if you squint and two blur into one, push them further apart. Single-hue ramps are for **quantitative** widgets only (`colorRamp.mode: "custom-sequential"`), never for `visualizationColors`. +2. **Pin semantic colors as literal hex, outside the palette.** "Bad" = a warm coral (e.g. `#FF7E5C`), "good" = a calm teal/green. Use `color.scale.mappings` with `{"hex": "..."}` — **not** `themeColorType: position`, because palette reshuffles silently swap palette-position colors. Reuse the good-teal that's already in the palette so it never clashes. +3. **Color non-categorical widgets explicitly so they join the family.** Maps & heatmaps: `colorRamp.mode: "custom-sequential"` with `{start, end}` from the family (if directional: `start` = bad color, `end` = good color). Forecast / multi-series: pin per-series via `color.scale.mappings` keyed on `displayName` (actual = solid family color, forecast = contrast/alert, threshold = muted tone). Sparkline counters: set `value.color` to a family color, not grey. +4. **"Lighter / more pastel" tweak**: nudge all stops up in lightness *together*; don't recolor individual ones. Re-sync the pinned semantic hex values; keep enough contrast on the alert color that it still reads as a warning. + +**Starter palettes** (pick one and adapt — extend to 7-8 stops if needed; semantic red/green stay as literal hex per rule 2): + +``` +#094074 #3C6997 #5ADBFF #FFDD4A #FE9000 +#003F5C #594E90 #BC4C96 #FF5F66 #FFA600 +#4A8CC7 #F59770 #FFD84A #F0E09E #6DD980 +#440154 #3B528B #21918C #5EC962 #FDE725 +#4E79A7 #F28E2C #E15759 #76B7B2 #59A14F +#0072B2 #E69F00 #009E73 #CC79A7 #D55E00 +#0D0887 #7E03A8 #CC4778 #F89441 #F0F921 +#6929C4 #1192E8 #005D5D #9F1853 #FA4D56 +``` ### Linking a Genie Space (Optional)