diff --git a/README.md b/README.md index 86b4a85..1513c82 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,18 @@ PRs welcome for: kernel offsets (run `--dump-offsets` on a target kernel, paste into `core/offsets.c`), new modules, detection rules, and CVE-status corrections. See [`CONTRIBUTING.md`](CONTRIBUTING.md). +**Keeping `kernel_range` tables current.** `tools/refresh-kernel-ranges.py` +polls Debian's security tracker and reports drift between each +module's hardcoded `kernel_patched_from` thresholds and the +fixed-versions Debian actually ships. Run periodically (or in CI) +to catch new backports that need to land in the corpus: + +```bash +tools/refresh-kernel-ranges.py # human report +tools/refresh-kernel-ranges.py --json # machine-readable +tools/refresh-kernel-ranges.py --patch # proposed C-source edits +``` + ## Acknowledgments Each module credits the original CVE reporter and PoC author in its diff --git a/tools/refresh-kernel-ranges.py b/tools/refresh-kernel-ranges.py new file mode 100755 index 0000000..05af415 --- /dev/null +++ b/tools/refresh-kernel-ranges.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 +""" +tools/refresh-kernel-ranges.py — Detect drift between each module's +kernel_patched_from table and Debian's security-tracker data. + +The repo's no-fabrication rule (CVES.md) means every kernel_range +threshold has to come from a real, citeable source. Debian's +security tracker is the most reliable per-CVE backport list — it's +machine-readable and updated continuously by the Debian security +team. This script: + + 1. Fetches https://security-tracker.debian.org/tracker/data/json + (cached at /tmp/skeletonkey-debian-tracker.json, 12h TTL). + 2. Scans every modules/*/skeletonkey_modules.c for + `kernel_patched_from [] = { {M, m, p}, ... };` arrays and + their corresponding `.cve = "CVE-..."` entry. + 3. For each module, compares the table against Debian's tracked + fixed-versions for that CVE. + 4. Reports: + missing branch — Debian has a fix at X.Y.Z; our table + has no X.Y entry. The module's detect() + would say VULNERABLE on a Debian host + that's actually patched. + too-tight threshold — Our X.Y.Z is HIGHER than Debian's fix + version; our module would call a + fixed host vulnerable. False-positive. + info (more conservative) — Our threshold is LOWER than + Debian's; we accept earlier kernels + as patched. Could be intentional or + could mean we have stale data. + +Usage: + tools/refresh-kernel-ranges.py # human report + tools/refresh-kernel-ranges.py --json # machine-readable + tools/refresh-kernel-ranges.py --patch # propose C-source edits + tools/refresh-kernel-ranges.py --refresh # force re-fetch +""" + +from __future__ import annotations + +import json +import os +import re +import sys +import time +import urllib.request + +CACHE = "/tmp/skeletonkey-debian-tracker.json" +TRACKER_URL = "https://security-tracker.debian.org/tracker/data/json" +CACHE_TTL_SEC = 12 * 3600 + + +# ── tracker fetch ──────────────────────────────────────────────────── + +def fetch_tracker(force_refresh: bool = False) -> dict: + """Return the parsed Debian tracker JSON. Cached at /tmp with 12h TTL.""" + if not force_refresh and os.path.exists(CACHE): + age = time.time() - os.stat(CACHE).st_mtime + if age < CACHE_TTL_SEC: + print(f"[*] using cached tracker ({CACHE}, age {int(age)}s)", + file=sys.stderr) + with open(CACHE) as f: + return json.load(f) + print(f"[*] fetching {TRACKER_URL} ...", file=sys.stderr) + req = urllib.request.Request( + TRACKER_URL, + headers={"User-Agent": "skeletonkey/refresh-kernel-ranges"}, + ) + with urllib.request.urlopen(req, timeout=120) as r: + data = r.read() + os.makedirs(os.path.dirname(CACHE), exist_ok=True) + with open(CACHE, "wb") as f: + f.write(data) + print(f"[*] tracker cached: {len(data) // 1024} KB", file=sys.stderr) + return json.loads(data) + + +# ── module source parsing ──────────────────────────────────────────── + +# Some modules have multiple .cve entries (e.g. dirty_frag_esp + +# dirty_frag_esp6 share the same CVE). Pull the first one. +RE_CVE = re.compile(r'\.cve\s*=\s*"(CVE-\d{4}-\d{4,7})"') +RE_TABLE = re.compile( + r'kernel_patched_from\s+(\w+)\s*\[\]\s*=\s*\{([^}]+(?:\}[^}]*)*?)\}\s*;', + re.MULTILINE, +) +RE_ENTRY = re.compile(r'\{\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\}') + + +def find_modules(repo_root: str): + """Yield {name, src, cve, table, table_name, table_span} per module. + + `table_span` is (start, end) byte offsets of the array body for + --patch mode that wants to edit the source. `table` is a list of + (major, minor, patch) tuples in source order.""" + mods_dir = os.path.join(repo_root, "modules") + for d in sorted(os.listdir(mods_dir)): + src = os.path.join(mods_dir, d, "skeletonkey_modules.c") + if not os.path.exists(src): + continue + with open(src) as f: + text = f.read() + cve_m = RE_CVE.search(text) + if not cve_m: + continue + tab_m = RE_TABLE.search(text) + if not tab_m: + continue + entries = [tuple(int(x) for x in e) for e in RE_ENTRY.findall(tab_m.group(2))] + if not entries: + continue + yield { + "name": d, + "src": src, + "cve": cve_m.group(1), + "table": entries, + "table_name": tab_m.group(1), + "table_span": (tab_m.start(2), tab_m.end(2)), + } + + +# ── Debian tracker lookup ──────────────────────────────────────────── + +# Debian release names we care about (in age order, oldest first). +# The tracker has more (e.g. ELTS) but those are usually too old to +# inform mainline-or-near-mainline backport thresholds. +DEBIAN_RELEASES = ["bullseye", "bookworm", "trixie", "forky", "sid"] + + +def parse_upstream_version(deb_ver: str) -> tuple[int, int, int] | None: + """Map a Debian package version like '5.10.218-1' to upstream + (5, 10, 218). Returns None on parse failure.""" + if not deb_ver: + return None + # Strip everything after first '-' (Debian revision) or '+' (backport). + head = re.split(r'[-+~]', deb_ver, maxsplit=1)[0] + parts = head.split(".") + if len(parts) < 3: + # Some Debian versions are X.Y (no patch). Treat patch as 0. + if len(parts) == 2: + parts.append("0") + else: + return None + try: + return (int(parts[0]), int(parts[1]), int(parts[2])) + except ValueError: + return None + + +def debian_fixed_for(tracker: dict, cve: str) -> dict[str, tuple[int, int, int]]: + """For a CVE, return {debian_release: upstream_version_tuple} of + fixed versions per the tracker. Skips releases with no fix yet.""" + out: dict[str, tuple[int, int, int]] = {} + for pkg in ("linux", "linux-grsec"): + pkg_data = tracker.get(pkg, {}) + if cve not in pkg_data: + continue + cve_data = pkg_data[cve] + for release, info in cve_data.get("releases", {}).items(): + if release not in DEBIAN_RELEASES: + continue + if info.get("status") != "resolved": + continue + fixed = info.get("fixed_version") + up = parse_upstream_version(fixed) + if up: + out[release] = up + return out + + +# ── compare + report ───────────────────────────────────────────────── + +def branch_of(v: tuple[int, int, int]) -> tuple[int, int]: + return (v[0], v[1]) + + +def compare(table: list[tuple[int, int, int]], + debian: dict[str, tuple[int, int, int]]) -> list[dict]: + """Return a list of finding dicts ({severity, message, ...}).""" + findings: list[dict] = [] + our_by_branch = {branch_of(t): t for t in table} + + # Group Debian releases by branch (multiple releases may share a branch) + debian_by_branch: dict[tuple[int, int], list[tuple[str, tuple[int, int, int]]]] = {} + for rel, ver in debian.items(): + debian_by_branch.setdefault(branch_of(ver), []).append((rel, ver)) + + for branch, rels in debian_by_branch.items(): + # Use the OLDEST fix Debian has on this branch (most permissive) + rels.sort(key=lambda x: x[1]) + oldest_rel, oldest_ver = rels[0] + rel_list = ", ".join(f"{r}: {v[0]}.{v[1]}.{v[2]}" for r, v in rels) + + if branch not in our_by_branch: + findings.append({ + "severity": "MISSING", + "message": ( + f"Debian has fix on the {branch[0]}.{branch[1]} branch " + f"(earliest: {oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]}, " + f"all: {rel_list}), but our table has no {branch[0]}.{branch[1]} entry" + ), + "suggest_add": list(oldest_ver), + }) + else: + our = our_by_branch[branch] + if our[2] > oldest_ver[2]: + findings.append({ + "severity": "TOO_TIGHT", + "message": ( + f"Our {our[0]}.{our[1]}.{our[2]} threshold is later than " + f"Debian's earliest fix on the {branch[0]}.{branch[1]} branch " + f"({oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]}, from " + f"{oldest_rel}). Hosts at {branch[0]}.{branch[1]}.{oldest_ver[2]} " + "are patched per Debian but our detect() would report " + "VULNERABLE." + ), + "suggest_replace": list(oldest_ver), + }) + elif our[2] < oldest_ver[2]: + # Our threshold is earlier — we're more permissive about + # what counts as patched. Usually fine (we have better + # info than Debian's stable backport) but flag as info. + findings.append({ + "severity": "INFO", + "message": ( + f"Our {our[0]}.{our[1]}.{our[2]} threshold is earlier " + f"than Debian's {oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]} " + f"({oldest_rel}). We're more permissive — verify this " + "is intentional (e.g. we tracked a different distro's " + "earlier backport)." + ), + }) + + return findings + + +# ── main ───────────────────────────────────────────────────────────── + +def render_text(reports: list[dict]) -> None: + """Human-readable report on stderr.""" + drifted = 0 + for r in reports: + if not r["findings"]: + print(f"[+] {r['name']:32s} ({r['cve']}) — table is current " + f"({len(r['table'])} entries)") + continue + drifted += 1 + print(f"[!] {r['name']} ({r['cve']})") + print(f" table: " + ", ".join( + f"{M}.{m}.{p}" for (M, m, p) in r["table"])) + if r["debian"]: + print(f" debian: " + ", ".join( + f"{rel}={M}.{m}.{p}" + for rel, (M, m, p) in sorted(r["debian"].items()))) + else: + print(" debian: (no resolved entries for this CVE)") + for f in r["findings"]: + tag = {"MISSING": "+", "TOO_TIGHT": "✗", "INFO": "i"}[f["severity"]] + print(f" [{tag}] {f['message']}") + print() + total = len(reports) + print(f"=== {drifted}/{total} module(s) drifted ===", file=sys.stderr) + + +def render_json(reports: list[dict]) -> None: + print(json.dumps({"modules": reports}, indent=2, default=lambda o: list(o))) + + +def render_patch(reports: list[dict]) -> None: + """Emit a brief proposed-edits diff for modules with MISSING or + TOO_TIGHT findings. Not actually applied — operator reviews.""" + for r in reports: + actionable = [f for f in r["findings"] + if f["severity"] in ("MISSING", "TOO_TIGHT")] + if not actionable: + continue + print(f"--- {r['src']}") + print(f"+++ {r['src']} (proposed)") + print(f"@@ kernel_patched_from {r['table_name']}[] @@") + # Reconstruct the table with the actionable changes applied. + new_table = list(r["table"]) + new_branches = {branch_of(t): list(t) for t in new_table} + for f in actionable: + if "suggest_add" in f: + v = tuple(f["suggest_add"]) + new_branches[branch_of(v)] = list(v) + elif "suggest_replace" in f: + v = tuple(f["suggest_replace"]) + new_branches[branch_of(v)] = list(v) + new_sorted = sorted(new_branches.values()) + old_set = {tuple(t) for t in r["table"]} + for entry in new_sorted: + t = tuple(entry) + if t in old_set: + print(f" {{{entry[0]:>2}, {entry[1]:>2}, {entry[2]:>3}}},") + else: + print(f" + {{{entry[0]:>2}, {entry[1]:>2}, {entry[2]:>3}}},") + for old in r["table"]: + if branch_of(old) not in new_branches or \ + list(old) != new_branches[branch_of(old)]: + print(f" - {{{old[0]:>2}, {old[1]:>2}, {old[2]:>3}}},") + print() + + +def main() -> int: + json_mode = "--json" in sys.argv + patch_mode = "--patch" in sys.argv + force = "--refresh" in sys.argv + + repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + tracker = fetch_tracker(force_refresh=force) + + if "linux" not in tracker: + print("[-] tracker JSON has no 'linux' package — schema changed?", + file=sys.stderr) + return 1 + + reports: list[dict] = [] + for mod in find_modules(repo_root): + debian = debian_fixed_for(tracker, mod["cve"]) + findings = compare(mod["table"], debian) + reports.append({ + "name": mod["name"], + "src": mod["src"], + "cve": mod["cve"], + "table_name": mod["table_name"], + "table": [list(t) for t in mod["table"]], + "debian": {k: list(v) for k, v in debian.items()}, + "findings": findings, + }) + + if json_mode: + render_json(reports) + elif patch_mode: + render_patch(reports) + else: + render_text(reports) + + # Exit code: 1 if any MISSING or TOO_TIGHT, 0 otherwise. INFO is fine. + actionable = sum(1 for r in reports for f in r["findings"] + if f["severity"] in ("MISSING", "TOO_TIGHT")) + return 1 if actionable else 0 + + +if __name__ == "__main__": + sys.exit(main())