Watchdog Subsystem

A watchdog timer is a hardware timer that resets the system if not periodically “kicked” (refreshed). This ensures system recovery from software hangs. The Linux watchdog subsystem provides a unified interface for watchdog hardware.

Watchdog Basics

flowchart TB
    subgraph Normal["Normal Operation"]
        direction LR
        APP1["Application"] -->|"Kick"| WDT1["Watchdog"]
        WDT1 -->|"Reset Timer"| WDT1
    end

    subgraph Hung["System Hung"]
        direction LR
        APP2["Application<br/>(frozen)"] -.->|"No Kick"| WDT2["Watchdog"]
        WDT2 -->|"Timeout!"| RESET["System Reset"]
    end

    style Normal fill:#7a8f73,stroke:#2e7d32
    style Hung fill:#8f7370,stroke:#c62828

Key Concepts

  • Timeout: Time before watchdog triggers reset
  • Kick/Ping: Refresh the watchdog timer
  • Pretimeout: Optional warning before actual timeout
  • Nowayout: Prevent watchdog from being stopped once started

Linux Watchdog Architecture

flowchart TB
    subgraph Userspace
        WDCTL["wdctl"]
        WDAPP["watchdog daemon"]
        CUSTOM["Custom App"]
    end

    subgraph Kernel["Kernel Space"]
        WDCORE["Watchdog Core"]
        WDDEV["watchdog_device"]
        WDOPS["watchdog_ops"]
    end

    subgraph Hardware
        HWWDT["Hardware Watchdog"]
    end

    WDCTL -->|"/dev/watchdog"| WDCORE
    WDAPP -->|"/dev/watchdog"| WDCORE
    CUSTOM -->|"/dev/watchdog"| WDCORE

    WDCORE --> WDDEV
    WDDEV --> WDOPS
    WDOPS --> HWWDT

    style Userspace fill:#738f99,stroke:#0277bd
    style Kernel fill:#7a8f73,stroke:#2e7d32
    style Hardware fill:#8f8a73,stroke:#f9a825

Key Structures

watchdog_device

#include <linux/watchdog.h>

struct watchdog_device {
    int id;                              /* Watchdog ID */
    struct device *parent;               /* Parent device */
    const struct attribute_group **groups; /* sysfs attribute groups */
    const struct watchdog_info *info;    /* Watchdog info */
    const struct watchdog_ops *ops;      /* Watchdog operations */
    const struct watchdog_governor *gov; /* Watchdog governor */
    unsigned int bootstatus;             /* Boot status flags */
    unsigned int timeout;                /* Current timeout (seconds) */
    unsigned int pretimeout;             /* Pretimeout (seconds) */
    unsigned int min_timeout;            /* Minimum timeout */
    unsigned int max_timeout;            /* Maximum timeout */
    unsigned int min_hw_heartbeat_ms;    /* Minimum hw heartbeat */
    unsigned int max_hw_heartbeat_ms;    /* Maximum hw heartbeat */
    struct notifier_block reboot_nb;     /* Reboot notifier */
    struct notifier_block restart_nb;    /* Restart notifier */
    void *driver_data;                   /* Driver private data */
    unsigned long status;                /* Internal status bits */
};

watchdog_info

struct watchdog_info {
    __u32 options;           /* Options the watchdog supports */
    __u32 firmware_version;  /* Firmware version */
    __u8  identity[32];      /* Watchdog identity string */
};

/* Option flags */
#define WDIOF_OVERHEAT      0x0001  /* Reset due to CPU overheat */
#define WDIOF_FANFAULT      0x0002  /* Fan failed */
#define WDIOF_EXTERN1       0x0004  /* External relay 1 */
#define WDIOF_EXTERN2       0x0008  /* External relay 2 */
#define WDIOF_POWERUNDER    0x0010  /* Power bad/undervoltage */
#define WDIOF_CARDRESET     0x0020  /* Card reset */
#define WDIOF_POWEROVER     0x0040  /* Power over limit */
#define WDIOF_SETTIMEOUT    0x0080  /* Can set timeout */
#define WDIOF_MAGICCLOSE    0x0100  /* Magic close character */
#define WDIOF_PRETIMEOUT    0x0200  /* Pretimeout supported */
#define WDIOF_ALARMONLY     0x0400  /* Alarm only, no reboot */
#define WDIOF_KEEPALIVEPING 0x8000  /* Keepalive ping reply */

watchdog_ops

struct watchdog_ops {
    struct module *owner;
    /* Start the watchdog */
    int (*start)(struct watchdog_device *wdd);
    /* Stop the watchdog */
    int (*stop)(struct watchdog_device *wdd);
    /* Ping/kick the watchdog */
    int (*ping)(struct watchdog_device *wdd);
    /* Get remaining time */
    unsigned int (*get_timeleft)(struct watchdog_device *wdd);
    /* Set timeout */
    int (*set_timeout)(struct watchdog_device *wdd, unsigned int timeout);
    /* Set pretimeout */
    int (*set_pretimeout)(struct watchdog_device *wdd, unsigned int timeout);
    /* Restart the system */
    int (*restart)(struct watchdog_device *wdd, unsigned long action,
                   void *data);
    /* ioctl handler */
    long (*ioctl)(struct watchdog_device *wdd, unsigned int cmd,
                  unsigned long arg);
};

Userspace Interface

Device Node

Watchdog is accessed via /dev/watchdog or /dev/watchdog0:

#include <linux/watchdog.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>

int fd = open("/dev/watchdog", O_RDWR);
if (fd < 0) {
    perror("Failed to open watchdog");
    return -1;
}

/* Start watchdog by opening the device */
/* Kick the watchdog */
write(fd, "k", 1);  /* Any write kicks */

/* Or use ioctl */
int dummy;
ioctl(fd, WDIOC_KEEPALIVE, &dummy);

/* Get timeout */
int timeout;
ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
printf("Timeout: %d seconds\n", timeout);

/* Set timeout */
timeout = 30;
ioctl(fd, WDIOC_SETTIMEOUT, &timeout);

/* Magic close (allows closing without reset) */
write(fd, "V", 1);  /* Magic character */
close(fd);

ioctl Commands

Command Description
WDIOC_GETSUPPORT Get watchdog_info
WDIOC_GETSTATUS Get status
WDIOC_GETBOOTSTATUS Get boot status
WDIOC_KEEPALIVE Kick watchdog
WDIOC_SETTIMEOUT Set timeout
WDIOC_GETTIMEOUT Get timeout
WDIOC_SETPRETIMEOUT Set pretimeout
WDIOC_GETPRETIMEOUT Get pretimeout
WDIOC_GETTIMELEFT Get time left

sysfs Interface

# View watchdog devices
ls /sys/class/watchdog/

# Watchdog info
cat /sys/class/watchdog/watchdog0/identity
cat /sys/class/watchdog/watchdog0/timeout
cat /sys/class/watchdog/watchdog0/timeleft
cat /sys/class/watchdog/watchdog0/state   # active/inactive

# Using wdctl tool
wdctl /dev/watchdog0

Watchdog Daemon

For production systems, use a watchdog daemon:

# Install watchdog daemon
sudo apt-get install watchdog

# Configure /etc/watchdog.conf
watchdog-device = /dev/watchdog
watchdog-timeout = 60
interval = 30

# Enable and start
sudo systemctl enable watchdog
sudo systemctl start watchdog

Device Tree Binding

watchdog@10000000 {
    compatible = "vendor,my-watchdog";
    reg = <0x10000000 0x100>;
    clocks = <&wdt_clk>;
    timeout-sec = <30>;              /* Default timeout */
};

/* With pretimeout interrupt */
watchdog@10001000 {
    compatible = "vendor,my-watchdog";
    reg = <0x10001000 0x100>;
    interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
    timeout-sec = <60>;
};

Status Flags

/* Status bits in watchdog_device.status */
#define WDOG_ACTIVE             0  /* Watchdog is running */
#define WDOG_NO_WAY_OUT         1  /* Nowayout enabled */
#define WDOG_STOP_ON_REBOOT     2  /* Stop on reboot */
#define WDOG_HW_RUNNING         3  /* Hardware is running */
#define WDOG_STOP_ON_UNREGISTER 4  /* Stop on unregister */

/* Helper macros */
static inline bool watchdog_active(struct watchdog_device *wdd)
{
    return test_bit(WDOG_ACTIVE, &wdd->status);
}

static inline bool watchdog_hw_running(struct watchdog_device *wdd)
{
    return test_bit(WDOG_HW_RUNNING, &wdd->status);
}

Nowayout Mode

When nowayout is set, the watchdog cannot be stopped:

static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0444);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started");

/* In probe */
if (nowayout)
    watchdog_set_nowayout(wdd, nowayout);

The kernel config CONFIG_WATCHDOG_NOWAYOUT sets the default.

Pretimeout

Pretimeout provides a warning before the actual timeout:

static irqreturn_t pretimeout_handler(int irq, void *data)
{
    struct my_wdt *wdt = data;

    /* Notify watchdog core */
    watchdog_notify_pretimeout(&wdt->wdd);

    return IRQ_HANDLED;
}

Pretimeout governors can take different actions:

  • noop: Do nothing
  • panic: Trigger kernel panic
  • pretimeout_notifier: Call notifier chain

Summary

  • Watchdog timers ensure system recovery from hangs
  • /dev/watchdog provides userspace interface
  • watchdog_device structure describes hardware capabilities
  • Support for timeout, pretimeout, and nowayout
  • Use watchdog daemon for production systems

Further Reading

Next

Learn how to implement a Watchdog driver.


Back to top

Linux Driver Development Guide is a community resource for learning kernel driver development. Not affiliated with the Linux Foundation. Content provided for educational purposes.

This site uses Just the Docs, a documentation theme for Jekyll.