Per-CPU Variables

Per-CPU variables provide each CPU with its own private copy of a variable. This eliminates the need for locking when accessing the variable, making them ideal for frequently updated counters and statistics.

Why Per-CPU Variables?

flowchart LR
    subgraph Shared["Shared Variable"]
        SV["counter = 100"]
        CPU0a["CPU0: lock, read, inc, write, unlock"]
        CPU1a["CPU1: lock, read, inc, write, unlock"]
        CPU0a --> SV
        CPU1a --> SV
    end

    subgraph PerCPU["Per-CPU Variable"]
        PC0["CPU0: counter = 50"]
        PC1["CPU1: counter = 50"]
        CPU0b["CPU0: inc (no lock!)"]
        CPU1b["CPU1: inc (no lock!)"]
        CPU0b --> PC0
        CPU1b --> PC1
    end

    style PerCPU fill:#7a8f73,stroke:#2e7d32

Benefits:

  • No locking needed for per-CPU access
  • No cache line bouncing between CPUs
  • Excellent scalability
  • Very fast access

Defining Per-CPU Variables

Static Definition

#include <linux/percpu.h>

/* Simple variable */
static DEFINE_PER_CPU(int, my_counter);

/* Structure */
static DEFINE_PER_CPU(struct my_stats, cpu_stats);

/* Array */
static DEFINE_PER_CPU(int[10], my_array);

Dynamic Allocation

int __percpu *dynamic_counter;

static int __init my_init(void)
{
    dynamic_counter = alloc_percpu(int);
    if (!dynamic_counter)
        return -ENOMEM;

    return 0;
}

static void __exit my_exit(void)
{
    free_percpu(dynamic_counter);
}

Accessing Per-CPU Variables

Disable Preemption

You must disable preemption when accessing per-CPU data to prevent migration to another CPU:

/* Get pointer to current CPU's variable */
preempt_disable();
int *ptr = this_cpu_ptr(&my_counter);
(*ptr)++;
preempt_enable();

/* Or use the simpler macros */
preempt_disable();
__this_cpu_inc(my_counter);  /* Fast, unsafe without preempt disabled */
preempt_enable();

/* Or let the macro handle it */
this_cpu_inc(my_counter);  /* Includes preempt disable/enable */

Quick Access Macros

/* Read */
int val = this_cpu_read(my_counter);
int val = __this_cpu_read(my_counter);  /* Requires preempt disabled */

/* Write */
this_cpu_write(my_counter, 42);
__this_cpu_write(my_counter, 42);

/* Increment/Decrement */
this_cpu_inc(my_counter);
this_cpu_dec(my_counter);
__this_cpu_inc(my_counter);
__this_cpu_dec(my_counter);

/* Add/Subtract */
this_cpu_add(my_counter, 5);
this_cpu_sub(my_counter, 3);

/* Or operation */
this_cpu_or(my_flags, FLAG_ACTIVE);

/* And operation */
this_cpu_and(my_flags, ~FLAG_ACTIVE);

Accessing Other CPU’s Variables

/* Get pointer to specific CPU's variable */
int *other_ptr = per_cpu_ptr(&my_counter, cpu);

/* Read from specific CPU */
int val = per_cpu(my_counter, cpu);

/* Iterate all CPUs */
int total = 0;
int cpu;

for_each_possible_cpu(cpu) {
    total += per_cpu(my_counter, cpu);
}

Complete Example: Statistics Counter

#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

struct my_stats {
    unsigned long packets;
    unsigned long bytes;
    unsigned long errors;
};

static DEFINE_PER_CPU(struct my_stats, cpu_stats);

/* Called from packet processing (frequent, performance-critical) */
void record_packet(size_t len, bool error)
{
    /* No locking needed! */
    this_cpu_inc(cpu_stats.packets);
    this_cpu_add(cpu_stats.bytes, len);
    if (error)
        this_cpu_inc(cpu_stats.errors);
}

/* Called from proc/stats (infrequent, can be slower) */
static int stats_show(struct seq_file *m, void *v)
{
    unsigned long total_packets = 0;
    unsigned long total_bytes = 0;
    unsigned long total_errors = 0;
    int cpu;

    /* Sum across all CPUs */
    for_each_possible_cpu(cpu) {
        struct my_stats *s = per_cpu_ptr(&cpu_stats, cpu);
        total_packets += s->packets;
        total_bytes += s->bytes;
        total_errors += s->errors;

        seq_printf(m, "CPU%d: packets=%lu bytes=%lu errors=%lu\n",
                   cpu, s->packets, s->bytes, s->errors);
    }

    seq_printf(m, "Total: packets=%lu bytes=%lu errors=%lu\n",
               total_packets, total_bytes, total_errors);

    return 0;
}

/* Reset statistics */
void reset_stats(void)
{
    int cpu;

    for_each_possible_cpu(cpu) {
        struct my_stats *s = per_cpu_ptr(&cpu_stats, cpu);
        s->packets = 0;
        s->bytes = 0;
        s->errors = 0;
    }
}

Dynamic Per-CPU Allocation

struct my_data {
    int counter;
    char buffer[64];
};

static struct my_data __percpu *dynamic_data;

static int __init my_init(void)
{
    int cpu;

    /* Allocate per-CPU structure */
    dynamic_data = alloc_percpu(struct my_data);
    if (!dynamic_data)
        return -ENOMEM;

    /* Initialize each CPU's copy */
    for_each_possible_cpu(cpu) {
        struct my_data *d = per_cpu_ptr(dynamic_data, cpu);
        d->counter = 0;
        snprintf(d->buffer, sizeof(d->buffer), "CPU%d data", cpu);
    }

    return 0;
}

static void __exit my_exit(void)
{
    free_percpu(dynamic_data);
}

void use_data(void)
{
    struct my_data *d;

    preempt_disable();
    d = this_cpu_ptr(dynamic_data);
    d->counter++;
    preempt_enable();
}

Per-CPU in Interrupt Context

Per-CPU variables are safe in interrupt context (interrupts are on the same CPU):

static DEFINE_PER_CPU(unsigned long, irq_count);

irqreturn_t my_irq_handler(int irq, void *dev_id)
{
    /* Safe - interrupts don't migrate */
    __this_cpu_inc(irq_count);  /* Can use __ variant */

    return IRQ_HANDLED;
}

Combining with Other Locks

Sometimes you need both per-CPU data and shared state:

struct global_state {
    spinlock_t lock;
    struct list_head list;
};

static DEFINE_PER_CPU(unsigned long, local_counter);
static struct global_state global;

void process_item(struct item *item)
{
    /* Update local counter (no lock) */
    this_cpu_inc(local_counter);

    /* Update global list (need lock) */
    spin_lock(&global.lock);
    list_add(&item->list, &global.list);
    spin_unlock(&global.lock);
}

CPU Hotplug Considerations

Handle CPUs coming online/offline:

#include <linux/cpu.h>

static int my_cpu_online(unsigned int cpu)
{
    struct my_data *d = per_cpu_ptr(dynamic_data, cpu);
    /* Initialize for newly online CPU */
    d->counter = 0;
    return 0;
}

static int my_cpu_offline(unsigned int cpu)
{
    struct my_data *d = per_cpu_ptr(dynamic_data, cpu);
    /* Save data before CPU goes offline */
    save_final_count(cpu, d->counter);
    return 0;
}

static int __init my_init(void)
{
    /* Register for CPU hotplug notifications */
    cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
                      "my_driver:online",
                      my_cpu_online,
                      my_cpu_offline);
    return 0;
}

When to Use Per-CPU Variables

Good candidates:

  • Statistics counters
  • Per-CPU caches
  • Frequently updated values
  • Performance-critical paths

Not suitable for:

  • Data that needs cross-CPU synchronization
  • Data that needs atomic cross-CPU reads
  • Small amounts of data (not worth the memory)

Summary

  • Per-CPU variables eliminate locking for CPU-local data
  • Use DEFINE_PER_CPU() for static, alloc_percpu() for dynamic
  • Use this_cpu_* macros for current CPU access
  • Use per_cpu_ptr() to access specific CPU’s data
  • Disable preemption when accessing (or use safe macros)
  • Aggregate by iterating for_each_possible_cpu()

Next

Learn about coherent DMA for hardware buffer allocation.


Back to top

Linux Driver Development Guide is a community resource for learning kernel driver development. Not affiliated with the Linux Foundation. Content provided for educational purposes.

This site uses Just the Docs, a documentation theme for Jekyll.