diff --git a/examples/metrics/grafana.json b/examples/metrics/grafana.json new file mode 100644 index 00000000..72d17597 --- /dev/null +++ b/examples/metrics/grafana.json @@ -0,0 +1,1025 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Monitoring UnifiedCache Connector Service (load/save)", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Hit rates of ucm lookup requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:interval_lookup_hit_rates_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:interval_lookup_hit_rates_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Average", + "range": true, + "refId": "A" + } + ], + "title": "Connector Interval Lookup Hit Rates", + "type": "timeseries" + }, + + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of load requests each start_load_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:load_requests_num_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:load_requests_num_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Load Requests Num", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of load blocks each start_load_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:load_blocks_num_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:load_blocks_num_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Load Blocks Num", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "P50, P90, P95, P99 and Average load duration in milliseconds for each start_load_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:load_duration_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:load_duration_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Load Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "P50, P90, P95, P99 and Average load speed in GB/s for each start_load_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.005 + }, + { + "color": "green", + "value": 0.01 + } + ] + }, + "unit": "gb/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:load_speed_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:load_speed_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Load Speed", + "type": "timeseries" + }, + + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of save requests each wait_for_save.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:save_requests_num_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:save_requests_num_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Save Requests Num", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of save blocks each wait_for_save.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:save_blocks_num_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:save_blocks_num_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Save Blocks Num", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "P50, P90, P95, P99 and Average save duration in milliseconds for each save_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 8000 + }, + { + "color": "red", + "value": 15000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:save_duration_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:save_duration_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Save Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "P50, P90, P95, P99 and Average save speed in GB/s for each save_kv.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.004 + }, + { + "color": "green", + "value": 0.008 + } + ] + }, + "unit": "gb/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(ucm:save_speed_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(ucm:save_speed_count{model_name=\"$model_name\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "worker-{{worker_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Connector Save Speed", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "prometheus", + "value": "edx8memhpd9tsa" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "/share/datasets/public_models/Meta-Llama-3-8B-Instruct", + "value": "/share/datasets/public_models/Meta-Llama-3-8B-Instruct" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(model_name)", + "hide": 0, + "includeAll": false, + "label": "model_name", + "multi": false, + "name": "model_name", + "options": [], + "query": { + "query": "label_values(model_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "vLLM - UnifiedCache Connector Monitoring", + "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b", + "version": 9, + "weekStart": "" +} \ No newline at end of file diff --git a/examples/metrics/metrics_configs.yaml b/examples/metrics/metrics_configs.yaml new file mode 100644 index 00000000..5ed07baa --- /dev/null +++ b/examples/metrics/metrics_configs.yaml @@ -0,0 +1,56 @@ +# Prometheus Metrics Configuration +# This file defines which metrics should be enabled and their configurations +log_interval: 5 # Interval in seconds for logging metrics + +prometheus: + multiproc_dir: "/vllm-workspace" # Directory for Prometheus multiprocess mode + + metric_prefix: "ucm:" + + # Enable/disable metrics by category + enabled_metrics: + counters: true + gauges: true + histograms: true + + # Counter metrics configuration + # counters: + # - name: "received_requests" + # documentation: "Total number of requests sent to ucm" + + # Gauge metrics configuration + # gauges: + # - name: "lookup_hit_rate" + # documentation: "Hit rate of ucm lookup requests since last log" + # multiprocess_mode: "livemostrecent" + + # Histogram metrics configuration + histograms: + - name: "load_requests_num" + documentation: "Number of requests loaded from ucm" + buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] + - name: "load_blocks_num" + documentation: "Number of blocks loaded from ucm" + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] + - name: "load_duration" + documentation: "Time to load from ucm (ms)" + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] + - name: "load_speed" + documentation: "Speed of loading from ucm (GB/s)" + buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] + - name: "save_requests_num" + documentation: "Number of requests saved to ucm" + buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] + - name: "save_blocks_num" + documentation: "Number of blocks saved to ucm" + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] + - name: "save_duration" + documentation: "Time to save to ucm (ms)" + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] + - name: "save_speed" + documentation: "Speed of saving to ucm (GB/s)" + buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] + - name: "interval_lookup_hit_rates" + documentation: "Hit rates of ucm lookup requests" + buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml index b7207942..e9189941 100644 --- a/examples/ucm_config_example.yaml +++ b/examples/ucm_config_example.yaml @@ -16,6 +16,8 @@ ucm_connectors: load_only_first_rank: false +metrics_config_path: "/vllm-workspace/metrics_config.yaml" + # Sparse attention configuration # Format 1: Dictionary format (for methods like ESA, KvComp) # ucm_sparse_config: diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 3809f6fd..820267ac 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -2,6 +2,7 @@ import itertools import os import pickle +import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Callable, List, Optional @@ -18,6 +19,8 @@ from vllm.v1.request import Request from ucm.logger import init_logger +from ucm.shared.metrics import ucmmonitor +from ucm.shared.metrics.observability import UCMStatsLogger from ucm.store.factory import UcmConnectorFactory from ucm.store.ucmstore import Task, UcmKVStoreBase from ucm.utils import Config @@ -127,6 +130,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.broadcast_fn = self.group_coordinator.broadcast self.broadcast_stream = torch.cuda.Stream() + logger.info(f"self.launch_config: {self.launch_config}") connector_configs = self.launch_config.get("ucm_connectors", []) assert len(connector_configs) > 0, "no storage connector name in config." @@ -153,6 +157,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): 1 if self.is_mla else num_head_per_tp ) self.store = UcmConnectorFactory.create_connector(name, config) + self.block_data_size = config["kv_block_size"] logger.info("init UCConnectorImpl, connector: %s", name) logger.info( @@ -161,6 +166,20 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): config["io_size"] / 1024, ) + self.metrics_config = self.launch_config.get("metrics_config_path", "") + if self.metrics_config: + self.stats_logger = UCMStatsLogger( + vllm_config.model_config.served_model_name, + self.rank, + self.metrics_config, + ) + self.monitor = ucmmonitor.StatsMonitor.get_instance() + self.synchronize = ( + torch.cuda.synchronize + if current_platform.is_cuda_alike() + else torch.npu.synchronize + ) + def generate_hash(self, block_size: int, request: "Request") -> list[str]: token_ids = request.all_token_ids @@ -209,6 +228,11 @@ def get_num_new_matched_tokens( f"hit hbm: {hbm_hit_block_num}, " f"hit external: {external_hit_blocks}" ) + if self.metrics_config: + self.monitor.update_stats( + "ConnStats", + {"interval_lookup_hit_rates": external_hit_blocks / len(ucm_block_ids)}, + ) total_hit_block_num = hbm_hit_block_num + external_hit_blocks @@ -452,7 +476,6 @@ def _broadcast(self, dst_tensor_addr: list[torch.Tensor]): tensor.copy_(rec_tensor[i]) def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: - metadata = self._get_connector_metadata() assert isinstance(metadata, UCMConnectorMetadata) @@ -460,9 +483,16 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: request_to_task: dict[str, Optional[Task]] = {} req_broadcast_addr = {} + is_load = False + num_loaded_block = 0 + num_loaded_request = 0 + load_start_time = time.perf_counter() * 1000 for request_id, request in metadata.request_meta.items(): if len(request.load_block_ids[0]) == 0: continue + is_load = True + num_loaded_block += len(request.load_block_ids[0]) + num_loaded_request += 1 ucm_block_ids, vllm_block_ids = request.load_block_ids if self.rank != 0 and not self.is_mla: @@ -486,6 +516,24 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: logger.error(f"request {request_id} load kv cache failed.") if self.load_only_first_rank: self._broadcast(req_broadcast_addr[request_id]) + load_end_time = time.perf_counter() * 1000 + load_speed = ( + num_loaded_block + * self.block_data_size + / (load_end_time - load_start_time) + / 1024 + / 1024 + ) # GB/s + if self.metrics_config and is_load: + self.monitor.update_stats( + "ConnStats", + { + "load_requests_num": num_loaded_request, + "load_blocks_num": num_loaded_block, + "load_duration": load_end_time - load_start_time, + "load_speed": load_speed, + }, + ) def wait_for_layer_load(self, layer_name: str) -> None: pass @@ -503,15 +551,24 @@ def wait_for_save(self) -> None: if (self.is_mla or self.is_dsa) and self.rank != 0: return + if self.metrics_config: + self.synchronize() metadata = self._get_connector_metadata() assert isinstance(metadata, UCMConnectorMetadata) request_to_task: dict[str, Task] = {} request_to_blocks: dict[str, list[str]] = {} + is_save = False + num_saved_block = 0 + num_saved_request = 0 + save_start_time = time.perf_counter() * 1000 for request_id, request in metadata.request_meta.items(): if len(request.dump_block_ids[0]) == 0: continue + is_save = True + num_saved_block += len(request.dump_block_ids[0]) + num_saved_request += 1 ucm_block_ids, vllm_block_ids = request.dump_block_ids if self.rank != 0: @@ -546,6 +603,24 @@ def wait_for_save(self) -> None: else: logger.error(f"request {request_id} dump kv cache failed.") self.store.commit(ucm_block_ids, False) + save_end_time = time.perf_counter() * 1000 + save_speed = ( + num_saved_block + * self.block_data_size + / (save_end_time - save_start_time) + / 1024 + / 1024 + ) # GB/s + if self.metrics_config and is_save: + self.monitor.update_stats( + "ConnStats", + { + "save_requests_num": num_saved_request, + "save_blocks_num": num_saved_block, + "save_duration": save_end_time - save_start_time, + "save_speed": save_speed, + }, + ) def clear_connector_metadata(self) -> None: super().clear_connector_metadata() diff --git a/ucm/shared/CMakeLists.txt b/ucm/shared/CMakeLists.txt index 3952830c..f44b8522 100644 --- a/ucm/shared/CMakeLists.txt +++ b/ucm/shared/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(vendor) add_subdirectory(trans) +add_subdirectory(metrics) add_subdirectory(test) diff --git a/ucm/shared/metrics/CMakeLists.txt b/ucm/shared/metrics/CMakeLists.txt new file mode 100644 index 00000000..3933b9f0 --- /dev/null +++ b/ucm/shared/metrics/CMakeLists.txt @@ -0,0 +1,15 @@ +file(GLOB_RECURSE CORE_SRCS CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/cc/stats/*.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/cc/*.cc") +add_library(monitor_static STATIC ${CORE_SRCS}) +set_property(TARGET monitor_static PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(monitor_static PUBLIC + $ + $) +set_target_properties(monitor_static PROPERTIES OUTPUT_NAME monitor) + +file(GLOB_RECURSE BINDINGS_SRCS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/cpy/*.cc") +pybind11_add_module(ucmmonitor ${BINDINGS_SRCS}) +target_link_libraries(ucmmonitor PRIVATE -Wl,--whole-archive monitor_static -Wl,--no-whole-archive) +target_include_directories(ucmmonitor PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cc) +set_target_properties(ucmmonitor PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats/conn_stats.cc b/ucm/shared/metrics/cc/stats/conn_stats.cc new file mode 100644 index 00000000..a2aafa70 --- /dev/null +++ b/ucm/shared/metrics/cc/stats/conn_stats.cc @@ -0,0 +1,79 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "conn_stats.h" + +namespace UC::Metrics { + +ConnStats::ConnStats() = default; + +std::string ConnStats::Name() const { + return "ConnStats"; +} + +void ConnStats::Reset() { + for (auto& v : data_) v.clear(); +} + +void ConnStats::Update(const std::unordered_map& params) { + for (const auto& [k, v] : params) { + Key id = KeyFromString(k); + if (id == Key::COUNT) continue; + EmplaceBack(id, v); + } +} + +std::unordered_map> ConnStats::Data() { + std::unordered_map> result; + result["save_requests_num"] = data_[static_cast(Key::save_requests_num)]; + result["save_blocks_num"] = data_[static_cast(Key::save_blocks_num)]; + result["save_duration"] = data_[static_cast(Key::save_duration)]; + result["save_speed"] = data_[static_cast(Key::save_speed)]; + result["load_requests_num"] = data_[static_cast(Key::load_requests_num)]; + result["load_blocks_num"] = data_[static_cast(Key::load_blocks_num)]; + result["load_duration"] = data_[static_cast(Key::load_duration)]; + result["load_speed"] = data_[static_cast(Key::load_speed)]; + result["interval_lookup_hit_rates"] = data_[static_cast(Key::interval_lookup_hit_rates)]; + return result; +} + +Key ConnStats::KeyFromString(const std::string& k) { + if (k == "save_requests_num") return Key::save_requests_num; + if (k == "save_blocks_num") return Key::save_blocks_num; + if (k == "save_duration") return Key::save_duration; + if (k == "save_speed") return Key::save_speed; + if (k == "load_requests_num") return Key::load_requests_num; + if (k == "load_blocks_num") return Key::load_blocks_num; + if (k == "load_duration") return Key::load_duration; + if (k == "load_speed") return Key::load_speed; + if (k == "interval_lookup_hit_rates")return Key::interval_lookup_hit_rates; + return Key::COUNT; +} + +void ConnStats::EmplaceBack(Key id, double value) { + data_[static_cast(id)].push_back(value); +} + +static Registrar registrar; + +} \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats/conn_stats.h b/ucm/shared/metrics/cc/stats/conn_stats.h new file mode 100644 index 00000000..34a76a68 --- /dev/null +++ b/ucm/shared/metrics/cc/stats/conn_stats.h @@ -0,0 +1,78 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_CONNSTATS_H +#define UNIFIEDCACHE_CONNSTATS_H + +#include "istats.h" +#include "stats_registry.h" +#include +#include +#include +#include +#include + +namespace UC::Metrics { + +enum class Key : uint8_t { + interval_lookup_hit_rates = 0, + save_requests_num, + save_blocks_num, + save_duration , + save_speed, + load_requests_num, + load_blocks_num, + load_duration, + load_speed, + COUNT +}; + +class ConnStats : public IStats { +public: + ConnStats(); + ~ConnStats() = default; + + std::string Name() const override; + void Reset() override; + void Update(const std::unordered_map& params) override; + std::unordered_map> Data() override; + +private: + static constexpr std::size_t N = static_cast(Key::COUNT); + std::array, N> data_; + + static Key KeyFromString(const std::string& k); + void EmplaceBack(Key id, double value); +}; + +struct Registrar { + Registrar() { + StatsRegistry::RegisterStats("ConnStats", []()->std::unique_ptr { + return std::make_unique(); + }); + } +}; + +} + +#endif // UNIFIEDCACHE_CONNSTATS_H \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats/istats.h b/ucm/shared/metrics/cc/stats/istats.h new file mode 100644 index 00000000..56a6e8e1 --- /dev/null +++ b/ucm/shared/metrics/cc/stats/istats.h @@ -0,0 +1,45 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_ISTATS_H +#define UNIFIEDCACHE_ISTATS_H + +#include +#include +#include +#include + +namespace UC::Metrics { + +class IStats { +public: + virtual ~IStats() = default; + virtual std::string Name() const = 0; + virtual void Update(const std::unordered_map& params) = 0; + virtual void Reset() = 0; + virtual std::unordered_map> Data() = 0; +}; + +} + +#endif \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats_monitor.cc b/ucm/shared/metrics/cc/stats_monitor.cc new file mode 100644 index 00000000..8b83920c --- /dev/null +++ b/ucm/shared/metrics/cc/stats_monitor.cc @@ -0,0 +1,80 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "stats/istats.h" +#include "stats_registry.h" +#include "stats_monitor.h" +#include +#include + +namespace UC::Metrics { + +StatsMonitor::StatsMonitor() { + auto& registry = StatsRegistry::GetInstance(); + for (const auto& name : registry.GetRegisteredStatsNames()) { + stats_map_[name] = registry.CreateStats(name); + } +} + +void StatsMonitor::CreateStats(const std::string& name) { + std::lock_guard lock(mutex_); + auto& registry = StatsRegistry::GetInstance(); + stats_map_[name] = registry.CreateStats(name); +} + +std::unordered_map> StatsMonitor::GetStats(const std::string& name) { + std::lock_guard lock(mutex_); + return stats_map_[name]->Data(); +} + +void StatsMonitor::ResetStats(const std::string& name) { + std::lock_guard lock(mutex_); + stats_map_[name]->Reset(); +} + +std::unordered_map> StatsMonitor::GetStatsAndClear(const std::string& name) { + std::lock_guard lock(mutex_); + auto result = stats_map_[name]->Data(); + stats_map_[name]->Reset(); + return result; +} + +void StatsMonitor::UpdateStats( + const std::string& name, + const std::unordered_map& params) +{ + std::lock_guard lock(mutex_); + auto it = stats_map_.find(name); + if (it != stats_map_.end()) { + it->second->Update(params); + } +} + +void StatsMonitor::ResetAllStats() { + std::lock_guard lock(mutex_); + for (auto& [n, ptr] : stats_map_) { + ptr->Reset(); + } +} + +} \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats_monitor.h b/ucm/shared/metrics/cc/stats_monitor.h new file mode 100644 index 00000000..b8bd688a --- /dev/null +++ b/ucm/shared/metrics/cc/stats_monitor.h @@ -0,0 +1,71 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_MONITOR_H +#define UNIFIEDCACHE_MONITOR_H + +#include "stats/istats.h" +#include +#include +#include +#include + +namespace UC::Metrics { + +class StatsMonitor { +public: + + static StatsMonitor& GetInstance() { + static StatsMonitor inst; + return inst; + } + + ~StatsMonitor() = default; + + void CreateStats(const std::string& name); + + std::unordered_map> + GetStats(const std::string& name); + + void ResetStats(const std::string& name); + + std::unordered_map> + GetStatsAndClear(const std::string& name); + + void UpdateStats(const std::string& name, + const std::unordered_map& params); + + void ResetAllStats(); + +private: + std::mutex mutex_; + std::unordered_map> stats_map_; + + StatsMonitor(); + StatsMonitor(const StatsMonitor&) = delete; + StatsMonitor& operator=(const StatsMonitor&) = delete; +}; + +} + +#endif // UNIFIEDCACHE_MONITOR_H \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats_registry.cc b/ucm/shared/metrics/cc/stats_registry.cc new file mode 100644 index 00000000..4a64516b --- /dev/null +++ b/ucm/shared/metrics/cc/stats_registry.cc @@ -0,0 +1,56 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "stats_registry.h" + +namespace UC::Metrics { + +StatsRegistry& StatsRegistry::GetInstance() { + static StatsRegistry inst; + return inst; +} + +void StatsRegistry::RegisterStats(std::string name, Creator creator) { + auto& reg = GetInstance(); + std::lock_guard lk(reg.mutex_); + reg.registry_[name] = creator; +} + +std::unique_ptr StatsRegistry::CreateStats(const std::string& name) { + auto& reg = GetInstance(); + std::lock_guard lk(reg.mutex_); + if (auto it = reg.registry_.find(name); it != reg.registry_.end()) + return it->second(); + return nullptr; +} + +std::vector StatsRegistry::GetRegisteredStatsNames() { + auto& reg = GetInstance(); + std::lock_guard lk(reg.mutex_); + std::vector names; + names.reserve(reg.registry_.size()); + for (auto& [n, _] : reg.registry_) names.push_back(n); + return names; +} + +} // namespace UC::Metrics \ No newline at end of file diff --git a/ucm/shared/metrics/cc/stats_registry.h b/ucm/shared/metrics/cc/stats_registry.h new file mode 100644 index 00000000..f7bb32fb --- /dev/null +++ b/ucm/shared/metrics/cc/stats_registry.h @@ -0,0 +1,58 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_REGISTRY_H +#define UNIFIEDCACHE_REGISTRY_H + +#include "stats/istats.h" +#include +#include +#include + +namespace UC::Metrics { + +using Creator = std::unique_ptr(*)(); + +class StatsRegistry { +public: + static StatsRegistry& GetInstance(); + + static void RegisterStats(std::string name, Creator creator); + + std::unique_ptr CreateStats(const std::string& name); + + std::vector GetRegisteredStatsNames(); + +private: + StatsRegistry() = default; + ~StatsRegistry() = default; + StatsRegistry(const StatsRegistry&) = delete; + StatsRegistry& operator=(const StatsRegistry&) = delete; + + std::mutex mutex_; + std::unordered_map registry_; +}; + +} + +#endif // UNIFIEDCACHE_REGISTRY_H \ No newline at end of file diff --git a/ucm/shared/metrics/cpy/metrics.py.cc b/ucm/shared/metrics/cpy/metrics.py.cc new file mode 100644 index 00000000..fd0a3aea --- /dev/null +++ b/ucm/shared/metrics/cpy/metrics.py.cc @@ -0,0 +1,49 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include +#include "stats_monitor.h" + +namespace py = pybind11; +namespace UC::Metrics { + +void bind_monitor(py::module_& m) { + py::class_(m, "StatsMonitor") + .def_static("get_instance", &StatsMonitor::GetInstance, + py::return_value_policy::reference) + .def("update_stats", &StatsMonitor::UpdateStats) + .def("reset_all", &StatsMonitor::ResetAllStats) + .def("get_stats", &StatsMonitor::GetStats) + .def("get_stats_and_clear", &StatsMonitor::GetStatsAndClear); +} + +} // namespace UC + +PYBIND11_MODULE(ucmmonitor, module) { + module.attr("project") = UCM_PROJECT_NAME; + module.attr("version") = UCM_PROJECT_VERSION; + module.attr("commit_id") = UCM_COMMIT_ID; + module.attr("build_type") = UCM_BUILD_TYPE; + UC::Metrics::bind_monitor(module); +} \ No newline at end of file diff --git a/ucm/shared/metrics/observability.py b/ucm/shared/metrics/observability.py new file mode 100644 index 00000000..fb33400c --- /dev/null +++ b/ucm/shared/metrics/observability.py @@ -0,0 +1,305 @@ +# +# MIT License +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +import os +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import prometheus_client +import yaml + +# Third Party +from prometheus_client import REGISTRY +from vllm.distributed.parallel_state import get_world_group + +from ucm.logger import init_logger +from ucm.shared.metrics import ucmmonitor + +logger = init_logger(__name__) + + +@dataclass +class UCMEngineMetadata: + """Metadata for UCM engine""" + + model_name: str + worker_id: str + + +class PrometheusLogger: + _gauge_cls = prometheus_client.Gauge + _counter_cls = prometheus_client.Counter + _histogram_cls = prometheus_client.Histogram + + def __init__(self, metadata: UCMEngineMetadata, config: Dict[str, Any]): + # Ensure PROMETHEUS_MULTIPROC_DIR is set before any metric registration + prometheus_config = config.get("prometheus", {}) + multiproc_dir = prometheus_config.get("multiproc_dir", "/vllm-workspace") + if "PROMETHEUS_MULTIPROC_DIR" not in os.environ: + os.environ["PROMETHEUS_MULTIPROC_DIR"] = multiproc_dir + if not os.path.exists(multiproc_dir): + os.makedirs(multiproc_dir, exist_ok=True) + + self.metadata = metadata + self.config = config + self.labels = self._metadata_to_labels(metadata) + labelnames = list(self.labels.keys()) + + # Initialize metrics based on configuration + self._init_metrics_from_config(labelnames, prometheus_config) + + def _init_metrics_from_config( + self, labelnames: List[str], prometheus_config: Dict[str, Any] + ): + """Initialize metrics based on configuration""" + enabled = prometheus_config.get("enabled_metrics", {}) + + # Get metric name prefix from config (e.g., "ucm:") + # If not specified, use empty string + metric_prefix = prometheus_config.get("metric_prefix", "ucm:") + + # Store metric mapping: metric_name -> (metric_type, attribute_name, stats_field_name) + # This mapping will be used in log_prometheus to dynamically log metrics + self.metric_mappings: Dict[str, Dict[str, str]] = {} + + # Initialize counters + if enabled.get("counters", True): + counters = prometheus_config.get("counters", []) + for counter_cfg in counters: + name = counter_cfg.get("name") + doc = counter_cfg.get("documentation", "") + # Prometheus metric name with prefix + prometheus_name = f"{metric_prefix}{name}" if metric_prefix else name + # Internal attribute name for storing the metric object + attr_name = f"counter_{name}" + + if not hasattr(self, attr_name): + setattr( + self, + attr_name, + self._counter_cls( + name=prometheus_name, + documentation=doc, + labelnames=labelnames, + ), + ) + # Store mapping for dynamic logging + self.metric_mappings[name] = { + "type": "counter", + "attr": attr_name, + } + + # Initialize gauges + if enabled.get("gauges", True): + gauges = prometheus_config.get("gauges", []) + for gauge_cfg in gauges: + name = gauge_cfg.get("name") + doc = gauge_cfg.get("documentation", "") + multiprocess_mode = gauge_cfg.get("multiprocess_mode", "live") + # Prometheus metric name with prefix + prometheus_name = f"{metric_prefix}{name}" if metric_prefix else name + # Internal attribute name + attr_name = f"gauge_{name}" + + if not hasattr(self, attr_name): + setattr( + self, + attr_name, + self._gauge_cls( + name=prometheus_name, + documentation=doc, + labelnames=labelnames, + multiprocess_mode=multiprocess_mode, + ), + ) + # Store mapping for dynamic logging + self.metric_mappings[name] = { + "type": "gauge", + "attr": attr_name, + } + + # Initialize histograms + if enabled.get("histograms", True): + histograms = prometheus_config.get("histograms", []) + for hist_cfg in histograms: + name = hist_cfg.get("name") + doc = hist_cfg.get("documentation", "") + buckets = hist_cfg.get("buckets", []) + # Prometheus metric name with prefix + prometheus_name = f"{metric_prefix}{name}" if metric_prefix else name + # Internal attribute name + attr_name = f"histogram_{name}" + + if not hasattr(self, attr_name): + setattr( + self, + attr_name, + self._histogram_cls( + name=prometheus_name, + documentation=doc, + labelnames=labelnames, + buckets=buckets, + ), + ) + # Store mapping for dynamic logging + self.metric_mappings[name] = { + "type": "histogram", + "attr": attr_name, + } + + def _log_gauge(self, gauge, data: Union[int, float]) -> None: + # Convenience function for logging to gauge. + gauge.labels(**self.labels).set(data) + + def _log_counter(self, counter, data: Union[int, float]) -> None: + # Convenience function for logging to counter. + # Prevent ValueError from negative increment + if data < 0: + return + counter.labels(**self.labels).inc(data) + + def _log_histogram(self, histogram, data: Union[List[int], List[float]]) -> None: + # Convenience function for logging to histogram. + for value in data: + histogram.labels(**self.labels).observe(value) + + def log_prometheus(self, stats: Any): + """Log metrics to Prometheus based on configuration file""" + # Dynamically log metrics based on what's configured in YAML + for stat_name, value in stats.items(): + try: + metric_mapped = self.metric_mappings[stat_name] + if metric_mapped is None: + logger.warning(f"Stat {stat_name} not initialized.") + continue + metric_obj = getattr(self, metric_mapped["attr"], None) + metric_type = metric_mapped["type"] + + # Log based on metric type + if metric_type == "counter": + self._log_counter(metric_obj, value) + elif metric_type == "gauge": + self._log_gauge(metric_obj, value) + elif metric_type == "histogram": + # Histograms expect a list + if not isinstance(value, list): + if value: + value = [value] + else: + value = [] + self._log_histogram(metric_obj, value) + except Exception as e: + logger.warning(f"Failed to log metric {stat_name}: {e}") + + @staticmethod + def _metadata_to_labels(metadata: UCMEngineMetadata): + return { + "model_name": metadata.model_name, + "worker_id": metadata.worker_id, + } + + _instance = None + + @staticmethod + def GetOrCreate( + metadata: UCMEngineMetadata, + config_path: str = "", + ) -> "PrometheusLogger": + if PrometheusLogger._instance is None: + PrometheusLogger._instance = PrometheusLogger(metadata, config_path) + # assert PrometheusLogger._instance.metadata == metadata, \ + # "PrometheusLogger instance already created with different metadata" + if PrometheusLogger._instance.metadata != metadata: + logger.error( + "PrometheusLogger instance already created with" + "different metadata. This should not happen except " + "in test" + ) + return PrometheusLogger._instance + + @staticmethod + def GetInstance() -> "PrometheusLogger": + assert ( + PrometheusLogger._instance is not None + ), "PrometheusLogger instance not created yet" + return PrometheusLogger._instance + + @staticmethod + def GetInstanceOrNone() -> Optional["PrometheusLogger"]: + """ + Returns the singleton instance of PrometheusLogger if it exists, + otherwise returns None. + """ + return PrometheusLogger._instance + + +class UCMStatsLogger: + def __init__(self, model_name: str, rank: int, config_path: str = ""): + # Create metadata + self.metadata = UCMEngineMetadata( + model_name=str(model_name), worker_id=str(rank) + ) + # Load configuration + config = self._load_config(config_path) + self.log_interval = config.get("log_interval", 10) + + self.monitor = ucmmonitor.StatsMonitor.get_instance() + self.prometheus_logger = PrometheusLogger.GetOrCreate(self.metadata, config) + self.is_running = True + + self.thread = threading.Thread(target=self.log_worker, daemon=True) + self.thread.start() + + def _load_config(self, config_path: str) -> Dict[str, Any]: + """Load configuration from YAML file""" + try: + with open(config_path, "r") as f: + config = yaml.safe_load(f) + if config is None: + logger.warning( + f"Config file {config_path} is empty, using defaults" + ) + return {} + return config + except FileNotFoundError: + logger.warning(f"Config file {config_path} not found, using defaults") + return {} + except yaml.YAMLError as e: + logger.error(f"Error parsing YAML config file {config_path}: {e}") + return {} + + def log_worker(self): + while self.is_running: + # Use UCMStatsMonitor.get_states_and_clear() from external import + stats = self.monitor.get_stats_and_clear("ConnStats") + self.prometheus_logger.log_prometheus(stats) + time.sleep(self.log_interval) + + def shutdown(self): + self.is_running = False + self.thread.join() diff --git a/ucm/shared/metrics/test/test.py b/ucm/shared/metrics/test/test.py new file mode 100644 index 00000000..246e6f88 --- /dev/null +++ b/ucm/shared/metrics/test/test.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# MIT License +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from ucm.shared.metrics import ucmmonitor + +# import monitor + +mon = ucmmonitor.StatsMonitor.get_instance() +mon.update_stats( + "ConnStats", + { + "save_duration": 1.2, + "save_speed": 300.5, + "load_duration": 0.8, + "load_speed": 450.0, + "interval_lookup_hit_rates": 0.95, + }, +) +mon.update_stats( + "ConnStats", + { + "save_duration": 1.2, + "save_speed": 300.5, + "load_duration": 0.8, + "load_speed": 450.0, + "interval_lookup_hit_rates": 0.95, + }, +) + +data = mon.get_stats("ConnStats") +print(data)