Skip to content

I want to add more details on CPU Frequencies #21

@ribalba

Description

@ribalba
#!/usr/bin/env python3
from bcc import BPF
import time
import signal
import json
import sys

# eBPF program: 
# This attaches to the cpu_frequency tracepoint and records time spent at frequencies.
# We keep track of (cpu, freq) and accumulate the deltas between frequency transitions.
#
# Data structures:
# - A per-CPU struct with last_timestamp and last_freq to store state.
# - A map keyed by (cpu, freq) that accumulates nanoseconds.
#
# On each frequency change:
# - Calculate delta from last event
# - Add delta to the old frequency's bucket
# - Update last frequency and timestamp
#
program = r"""
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
#include <linux/ktime.h>

struct freq_key {
    u32 cpu;
    u32 freq;
};

struct cpu_state {
    u64 last_ts;
    u32 last_freq;
};

BPF_PERCPU_ARRAY(cpu_states, struct cpu_state, 128);  // Assume up to 128 CPUs
BPF_HASH(freq_usage, struct freq_key, u64);

TRACEPOINT_PROBE(power, cpu_frequency) {
    u32 cpu = bpf_get_smp_processor_id();
    struct cpu_state *state = cpu_states.lookup(&cpu);
    if (!state) {
        return 0;
    }

    u64 now = bpf_ktime_get_ns();
    u32 new_freq = args->state;

    if (state->last_ts != 0 && state->last_freq != 0) {
        // Calculate delta time
        u64 delta = now - state->last_ts;

        struct freq_key key = {};
        key.cpu = cpu;
        key.freq = state->last_freq;

        u64 *val = freq_usage.lookup(&key);
        if (!val) {
            u64 zero = 0;
            freq_usage.update(&key, &zero);
            val = freq_usage.lookup(&key);
        }
        if (val) {
            *val += delta;
        }
    }

    // Update state
    state->last_ts = now;
    state->last_freq = new_freq;

    return 0;
}

// Initialize states at start (optional, might not be strictly needed if we trust the tracepoint to fire)
"""

# Load BPF
b = BPF(text=program)

# Initialize CPU states
num_cpus = len(open("/proc/cpuinfo").read().strip().split("processor\t: ")) - 1
cpu_states = b.get_table("cpu_states")
for i in range(num_cpus):
    cpu_states[i] = (0, 0)  # last_ts=0, last_freq=0

# Attach tracepoint
# The tracepoint power:cpu_frequency is stable on modern kernels
# If it doesn't exist on your system, you might need another approach.
print("Tracing CPU frequency changes... Press Ctrl-C to end.")

def signal_handler(sig, frame):
    # On Ctrl-C, we print out the collected data
    freq_usage = b.get_table("freq_usage")

    # Collect all data from the table
    data = {}
    # data structure:
    # { 'cpus': [ { 'cpu': X, 'freq': freq_value, 'used_ns': val, ... }, ...] }
    # We'll merge and then compute ratios.

    # First, sum up total usage per CPU to compute ratios.
    cpu_freq_map = {}
    for k, v in freq_usage.items():
        c = k.cpu
        f = k.freq
        ns = v.value

        if c not in cpu_freq_map:
            cpu_freq_map[c] = {}
        cpu_freq_map[c][f] = ns

    # Compute totals and ratios
    cpus_data = []
    for c in sorted(cpu_freq_map.keys()):
        freqs_data = []
        freq_dict = cpu_freq_map[c]
        total_ns = sum(freq_dict.values())
        if total_ns == 0:
            total_ns = 1  # avoid division by zero, means no recorded usage

        # Fetch current cpu frequency from sysfs if available (optional)
        # This is just an example to show freq_hz from scaling_cur_freq
        # If not available, you can omit freq_hz or set a placeholder.
        try:
            with open(f"/sys/devices/system/cpu/cpu{c}/cpufreq/scaling_cur_freq", "r") as f:
                freq_hz = float(f.read().strip()) * 1000.0  # kHz to Hz
        except:
            # If not available, we can set it to None or 0.0
            freq_hz = 0.0

        for f, ns in sorted(freq_dict.items()):
            used_ratio = float(ns) / float(total_ns) if total_ns > 0 else 0.0
            freqs_data.append({
                'freq': f,
                'used_ns': ns,
                'used_ratio': used_ratio
            })
        
        # We can also attempt to guess idle times if wanted. If we consider all recorded time as active,
        # idle might be derived from total recorded intervals. That would require a more complex logic.
        # For simplicity, assume idle_ns as some portion of unaccounted time. Here we just skip idle.
        # But you could track idle states (like frequency 0 might indicate idle, if such events appear).
        
        # Let's say idle_ns is what wasn't attributed to any known freq if we had a baseline start time.
        # Since we only record during transitions, we actually only know the time spent at each frequency.
        # We'll omit a complex idle calculation. Let's set idle_ns and idle_ratio to 0 for now.
        # If desired, you can integrate C-states or other data to find idle time.
        idle_ns = 0
        idle_ratio = 0.0
        
        cpus_data.append({
            'cpu': c,
            'dvfm_states': freqs_data,
            'freq_hz': freq_hz,
            'idle_ns': idle_ns,
            'idle_ratio': idle_ratio
        })

    output = {'cpus': cpus_data}
    print(json.dumps(output, indent=4))
    sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

# Run indefinitely until Ctrl-C
while True:
    time.sleep(5)

This is what I have been playing around with. I now only need a way to map this to processes.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions