df4b879527
Standalone Python script that pulls Debian's security-tracker JSON
and compares each module's hardcoded kernel_patched_from table
against the fixed-versions Debian actually ships. Surfaces real
drift the no-fabrication rule needs us to fix:
MISSING — Debian has a fix on a kernel branch we have no entry
for. Module's detect() would say VULNERABLE on a host
that's actually patched.
TOO_TIGHT — Our threshold is later than Debian's earliest fix on
the same branch. Module would call a patched host
VULNERABLE. False-positive on production fleets.
INFO — Our threshold is earlier than Debian's. We're more
permissive; usually fine (we tracked a different
upstream-stable cut), but flagged for review.
Three output modes:
default (text) — human-readable report on stderr
--json — machine-readable for CI / dashboards
--patch — unified-diff-style proposed C-source edits
--refresh — bypass the 12h cache TTL and re-fetch
Implementation:
- urllib (no pip deps) fetches the ~70MB tracker JSON.
- Cached at /tmp/skeletonkey-debian-tracker.json with 12h TTL.
- Parses every modules/*/skeletonkey_modules.c for the .cve = '...'
field + the kernel_patched_from <name>[] = { {M,m,p}, ... } array.
- Per CVE, builds {debian_release -> upstream_version_tuple} from
the tracker's 'releases.*.fixed_version' field (stripping Debian
-N / +bN / ~bpoN suffixes to recover the upstream version).
- Groups by (major, minor) branch; flags MISSING / TOO_TIGHT / INFO.
- Exits non-zero when MISSING or TOO_TIGHT findings exist (suitable
for a CI 'detect-drift' job).
First-run output found drift in 17 of 20 modules with kernel_range
tables — operator-reviewable. NOT auto-applied; this commit only
ships the diagnostic tool, not the suggested fixes.
README's Contributing section now points at the tool.
347 lines
14 KiB
Python
Executable File
347 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
tools/refresh-kernel-ranges.py — Detect drift between each module's
|
|
kernel_patched_from table and Debian's security-tracker data.
|
|
|
|
The repo's no-fabrication rule (CVES.md) means every kernel_range
|
|
threshold has to come from a real, citeable source. Debian's
|
|
security tracker is the most reliable per-CVE backport list — it's
|
|
machine-readable and updated continuously by the Debian security
|
|
team. This script:
|
|
|
|
1. Fetches https://security-tracker.debian.org/tracker/data/json
|
|
(cached at /tmp/skeletonkey-debian-tracker.json, 12h TTL).
|
|
2. Scans every modules/*/skeletonkey_modules.c for
|
|
`kernel_patched_from <name>[] = { {M, m, p}, ... };` arrays and
|
|
their corresponding `.cve = "CVE-..."` entry.
|
|
3. For each module, compares the table against Debian's tracked
|
|
fixed-versions for that CVE.
|
|
4. Reports:
|
|
missing branch — Debian has a fix at X.Y.Z; our table
|
|
has no X.Y entry. The module's detect()
|
|
would say VULNERABLE on a Debian host
|
|
that's actually patched.
|
|
too-tight threshold — Our X.Y.Z is HIGHER than Debian's fix
|
|
version; our module would call a
|
|
fixed host vulnerable. False-positive.
|
|
info (more conservative) — Our threshold is LOWER than
|
|
Debian's; we accept earlier kernels
|
|
as patched. Could be intentional or
|
|
could mean we have stale data.
|
|
|
|
Usage:
|
|
tools/refresh-kernel-ranges.py # human report
|
|
tools/refresh-kernel-ranges.py --json # machine-readable
|
|
tools/refresh-kernel-ranges.py --patch # propose C-source edits
|
|
tools/refresh-kernel-ranges.py --refresh # force re-fetch
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
|
|
CACHE = "/tmp/skeletonkey-debian-tracker.json"
|
|
TRACKER_URL = "https://security-tracker.debian.org/tracker/data/json"
|
|
CACHE_TTL_SEC = 12 * 3600
|
|
|
|
|
|
# ── tracker fetch ────────────────────────────────────────────────────
|
|
|
|
def fetch_tracker(force_refresh: bool = False) -> dict:
|
|
"""Return the parsed Debian tracker JSON. Cached at /tmp with 12h TTL."""
|
|
if not force_refresh and os.path.exists(CACHE):
|
|
age = time.time() - os.stat(CACHE).st_mtime
|
|
if age < CACHE_TTL_SEC:
|
|
print(f"[*] using cached tracker ({CACHE}, age {int(age)}s)",
|
|
file=sys.stderr)
|
|
with open(CACHE) as f:
|
|
return json.load(f)
|
|
print(f"[*] fetching {TRACKER_URL} ...", file=sys.stderr)
|
|
req = urllib.request.Request(
|
|
TRACKER_URL,
|
|
headers={"User-Agent": "skeletonkey/refresh-kernel-ranges"},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=120) as r:
|
|
data = r.read()
|
|
os.makedirs(os.path.dirname(CACHE), exist_ok=True)
|
|
with open(CACHE, "wb") as f:
|
|
f.write(data)
|
|
print(f"[*] tracker cached: {len(data) // 1024} KB", file=sys.stderr)
|
|
return json.loads(data)
|
|
|
|
|
|
# ── module source parsing ────────────────────────────────────────────
|
|
|
|
# Some modules have multiple .cve entries (e.g. dirty_frag_esp +
|
|
# dirty_frag_esp6 share the same CVE). Pull the first one.
|
|
RE_CVE = re.compile(r'\.cve\s*=\s*"(CVE-\d{4}-\d{4,7})"')
|
|
RE_TABLE = re.compile(
|
|
r'kernel_patched_from\s+(\w+)\s*\[\]\s*=\s*\{([^}]+(?:\}[^}]*)*?)\}\s*;',
|
|
re.MULTILINE,
|
|
)
|
|
RE_ENTRY = re.compile(r'\{\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\}')
|
|
|
|
|
|
def find_modules(repo_root: str):
|
|
"""Yield {name, src, cve, table, table_name, table_span} per module.
|
|
|
|
`table_span` is (start, end) byte offsets of the array body for
|
|
--patch mode that wants to edit the source. `table` is a list of
|
|
(major, minor, patch) tuples in source order."""
|
|
mods_dir = os.path.join(repo_root, "modules")
|
|
for d in sorted(os.listdir(mods_dir)):
|
|
src = os.path.join(mods_dir, d, "skeletonkey_modules.c")
|
|
if not os.path.exists(src):
|
|
continue
|
|
with open(src) as f:
|
|
text = f.read()
|
|
cve_m = RE_CVE.search(text)
|
|
if not cve_m:
|
|
continue
|
|
tab_m = RE_TABLE.search(text)
|
|
if not tab_m:
|
|
continue
|
|
entries = [tuple(int(x) for x in e) for e in RE_ENTRY.findall(tab_m.group(2))]
|
|
if not entries:
|
|
continue
|
|
yield {
|
|
"name": d,
|
|
"src": src,
|
|
"cve": cve_m.group(1),
|
|
"table": entries,
|
|
"table_name": tab_m.group(1),
|
|
"table_span": (tab_m.start(2), tab_m.end(2)),
|
|
}
|
|
|
|
|
|
# ── Debian tracker lookup ────────────────────────────────────────────
|
|
|
|
# Debian release names we care about (in age order, oldest first).
|
|
# The tracker has more (e.g. ELTS) but those are usually too old to
|
|
# inform mainline-or-near-mainline backport thresholds.
|
|
DEBIAN_RELEASES = ["bullseye", "bookworm", "trixie", "forky", "sid"]
|
|
|
|
|
|
def parse_upstream_version(deb_ver: str) -> tuple[int, int, int] | None:
|
|
"""Map a Debian package version like '5.10.218-1' to upstream
|
|
(5, 10, 218). Returns None on parse failure."""
|
|
if not deb_ver:
|
|
return None
|
|
# Strip everything after first '-' (Debian revision) or '+' (backport).
|
|
head = re.split(r'[-+~]', deb_ver, maxsplit=1)[0]
|
|
parts = head.split(".")
|
|
if len(parts) < 3:
|
|
# Some Debian versions are X.Y (no patch). Treat patch as 0.
|
|
if len(parts) == 2:
|
|
parts.append("0")
|
|
else:
|
|
return None
|
|
try:
|
|
return (int(parts[0]), int(parts[1]), int(parts[2]))
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def debian_fixed_for(tracker: dict, cve: str) -> dict[str, tuple[int, int, int]]:
|
|
"""For a CVE, return {debian_release: upstream_version_tuple} of
|
|
fixed versions per the tracker. Skips releases with no fix yet."""
|
|
out: dict[str, tuple[int, int, int]] = {}
|
|
for pkg in ("linux", "linux-grsec"):
|
|
pkg_data = tracker.get(pkg, {})
|
|
if cve not in pkg_data:
|
|
continue
|
|
cve_data = pkg_data[cve]
|
|
for release, info in cve_data.get("releases", {}).items():
|
|
if release not in DEBIAN_RELEASES:
|
|
continue
|
|
if info.get("status") != "resolved":
|
|
continue
|
|
fixed = info.get("fixed_version")
|
|
up = parse_upstream_version(fixed)
|
|
if up:
|
|
out[release] = up
|
|
return out
|
|
|
|
|
|
# ── compare + report ─────────────────────────────────────────────────
|
|
|
|
def branch_of(v: tuple[int, int, int]) -> tuple[int, int]:
|
|
return (v[0], v[1])
|
|
|
|
|
|
def compare(table: list[tuple[int, int, int]],
|
|
debian: dict[str, tuple[int, int, int]]) -> list[dict]:
|
|
"""Return a list of finding dicts ({severity, message, ...})."""
|
|
findings: list[dict] = []
|
|
our_by_branch = {branch_of(t): t for t in table}
|
|
|
|
# Group Debian releases by branch (multiple releases may share a branch)
|
|
debian_by_branch: dict[tuple[int, int], list[tuple[str, tuple[int, int, int]]]] = {}
|
|
for rel, ver in debian.items():
|
|
debian_by_branch.setdefault(branch_of(ver), []).append((rel, ver))
|
|
|
|
for branch, rels in debian_by_branch.items():
|
|
# Use the OLDEST fix Debian has on this branch (most permissive)
|
|
rels.sort(key=lambda x: x[1])
|
|
oldest_rel, oldest_ver = rels[0]
|
|
rel_list = ", ".join(f"{r}: {v[0]}.{v[1]}.{v[2]}" for r, v in rels)
|
|
|
|
if branch not in our_by_branch:
|
|
findings.append({
|
|
"severity": "MISSING",
|
|
"message": (
|
|
f"Debian has fix on the {branch[0]}.{branch[1]} branch "
|
|
f"(earliest: {oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]}, "
|
|
f"all: {rel_list}), but our table has no {branch[0]}.{branch[1]} entry"
|
|
),
|
|
"suggest_add": list(oldest_ver),
|
|
})
|
|
else:
|
|
our = our_by_branch[branch]
|
|
if our[2] > oldest_ver[2]:
|
|
findings.append({
|
|
"severity": "TOO_TIGHT",
|
|
"message": (
|
|
f"Our {our[0]}.{our[1]}.{our[2]} threshold is later than "
|
|
f"Debian's earliest fix on the {branch[0]}.{branch[1]} branch "
|
|
f"({oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]}, from "
|
|
f"{oldest_rel}). Hosts at {branch[0]}.{branch[1]}.{oldest_ver[2]} "
|
|
"are patched per Debian but our detect() would report "
|
|
"VULNERABLE."
|
|
),
|
|
"suggest_replace": list(oldest_ver),
|
|
})
|
|
elif our[2] < oldest_ver[2]:
|
|
# Our threshold is earlier — we're more permissive about
|
|
# what counts as patched. Usually fine (we have better
|
|
# info than Debian's stable backport) but flag as info.
|
|
findings.append({
|
|
"severity": "INFO",
|
|
"message": (
|
|
f"Our {our[0]}.{our[1]}.{our[2]} threshold is earlier "
|
|
f"than Debian's {oldest_ver[0]}.{oldest_ver[1]}.{oldest_ver[2]} "
|
|
f"({oldest_rel}). We're more permissive — verify this "
|
|
"is intentional (e.g. we tracked a different distro's "
|
|
"earlier backport)."
|
|
),
|
|
})
|
|
|
|
return findings
|
|
|
|
|
|
# ── main ─────────────────────────────────────────────────────────────
|
|
|
|
def render_text(reports: list[dict]) -> None:
|
|
"""Human-readable report on stderr."""
|
|
drifted = 0
|
|
for r in reports:
|
|
if not r["findings"]:
|
|
print(f"[+] {r['name']:32s} ({r['cve']}) — table is current "
|
|
f"({len(r['table'])} entries)")
|
|
continue
|
|
drifted += 1
|
|
print(f"[!] {r['name']} ({r['cve']})")
|
|
print(f" table: " + ", ".join(
|
|
f"{M}.{m}.{p}" for (M, m, p) in r["table"]))
|
|
if r["debian"]:
|
|
print(f" debian: " + ", ".join(
|
|
f"{rel}={M}.{m}.{p}"
|
|
for rel, (M, m, p) in sorted(r["debian"].items())))
|
|
else:
|
|
print(" debian: (no resolved entries for this CVE)")
|
|
for f in r["findings"]:
|
|
tag = {"MISSING": "+", "TOO_TIGHT": "✗", "INFO": "i"}[f["severity"]]
|
|
print(f" [{tag}] {f['message']}")
|
|
print()
|
|
total = len(reports)
|
|
print(f"=== {drifted}/{total} module(s) drifted ===", file=sys.stderr)
|
|
|
|
|
|
def render_json(reports: list[dict]) -> None:
|
|
print(json.dumps({"modules": reports}, indent=2, default=lambda o: list(o)))
|
|
|
|
|
|
def render_patch(reports: list[dict]) -> None:
|
|
"""Emit a brief proposed-edits diff for modules with MISSING or
|
|
TOO_TIGHT findings. Not actually applied — operator reviews."""
|
|
for r in reports:
|
|
actionable = [f for f in r["findings"]
|
|
if f["severity"] in ("MISSING", "TOO_TIGHT")]
|
|
if not actionable:
|
|
continue
|
|
print(f"--- {r['src']}")
|
|
print(f"+++ {r['src']} (proposed)")
|
|
print(f"@@ kernel_patched_from {r['table_name']}[] @@")
|
|
# Reconstruct the table with the actionable changes applied.
|
|
new_table = list(r["table"])
|
|
new_branches = {branch_of(t): list(t) for t in new_table}
|
|
for f in actionable:
|
|
if "suggest_add" in f:
|
|
v = tuple(f["suggest_add"])
|
|
new_branches[branch_of(v)] = list(v)
|
|
elif "suggest_replace" in f:
|
|
v = tuple(f["suggest_replace"])
|
|
new_branches[branch_of(v)] = list(v)
|
|
new_sorted = sorted(new_branches.values())
|
|
old_set = {tuple(t) for t in r["table"]}
|
|
for entry in new_sorted:
|
|
t = tuple(entry)
|
|
if t in old_set:
|
|
print(f" {{{entry[0]:>2}, {entry[1]:>2}, {entry[2]:>3}}},")
|
|
else:
|
|
print(f" + {{{entry[0]:>2}, {entry[1]:>2}, {entry[2]:>3}}},")
|
|
for old in r["table"]:
|
|
if branch_of(old) not in new_branches or \
|
|
list(old) != new_branches[branch_of(old)]:
|
|
print(f" - {{{old[0]:>2}, {old[1]:>2}, {old[2]:>3}}},")
|
|
print()
|
|
|
|
|
|
def main() -> int:
|
|
json_mode = "--json" in sys.argv
|
|
patch_mode = "--patch" in sys.argv
|
|
force = "--refresh" in sys.argv
|
|
|
|
repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
tracker = fetch_tracker(force_refresh=force)
|
|
|
|
if "linux" not in tracker:
|
|
print("[-] tracker JSON has no 'linux' package — schema changed?",
|
|
file=sys.stderr)
|
|
return 1
|
|
|
|
reports: list[dict] = []
|
|
for mod in find_modules(repo_root):
|
|
debian = debian_fixed_for(tracker, mod["cve"])
|
|
findings = compare(mod["table"], debian)
|
|
reports.append({
|
|
"name": mod["name"],
|
|
"src": mod["src"],
|
|
"cve": mod["cve"],
|
|
"table_name": mod["table_name"],
|
|
"table": [list(t) for t in mod["table"]],
|
|
"debian": {k: list(v) for k, v in debian.items()},
|
|
"findings": findings,
|
|
})
|
|
|
|
if json_mode:
|
|
render_json(reports)
|
|
elif patch_mode:
|
|
render_patch(reports)
|
|
else:
|
|
render_text(reports)
|
|
|
|
# Exit code: 1 if any MISSING or TOO_TIGHT, 0 otherwise. INFO is fine.
|
|
actionable = sum(1 for r in reports for f in r["findings"]
|
|
if f["severity"] in ("MISSING", "TOO_TIGHT"))
|
|
return 1 if actionable else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|