#!/usr/bin/env python3 """ tools/refresh-verifications.py — read docs/VERIFICATIONS.jsonl, generate core/verifications.c with a deduped, sorted lookup table. Dedup key: (module, vm_box, host_kernel, expect_detect). On collision, the LATEST verified_at wins (so re-runs update rather than accumulate). Records are then sorted by module name so the output is stable and review-friendly. Records with no module name are dropped silently. Records with status != "match" are kept so MISMATCH histories stay visible in --module-info (but don't earn the ✓ verified badge). Usage: tools/refresh-verifications.py # regenerate core/verifications.c tools/refresh-verifications.py --check # exit 1 if regenerating would change anything """ import argparse import json import sys from pathlib import Path REPO = Path(__file__).resolve().parent.parent JSONL = REPO / "docs" / "VERIFICATIONS.jsonl" OUT_C = REPO / "core" / "verifications.c" def load_records(): if not JSONL.exists(): return [] out = [] for line in JSONL.read_text().splitlines(): line = line.strip() if not line or line.startswith("#"): continue try: r = json.loads(line) if r.get("module"): out.append(r) except json.JSONDecodeError as e: print(f"[!] skipping bad JSONL line: {e}", file=sys.stderr) return out def dedup_latest(records): """Keep only the latest record per (module, vm_box, host_kernel). NB: expect_detect is intentionally NOT part of the dedup key. If we re-verify the same target with a corrected expectation, the new record supersedes the old one entirely (the old MISMATCH was a stale target-yaml entry, not a separate test scenario).""" by_key = {} for r in records: k = (r.get("module"), r.get("vm_box"), r.get("host_kernel")) prev = by_key.get(k) if prev is None or r.get("verified_at", "") > prev.get("verified_at", ""): by_key[k] = r return sorted(by_key.values(), key=lambda r: (r["module"], r.get("vm_box", ""), r.get("host_kernel", ""))) def date_only(iso_ts: str) -> str: """Truncate 2026-05-23T19:26:02Z -> 2026-05-23.""" if not iso_ts: return "" return iso_ts.split("T", 1)[0] def cstr(s): if s is None or s == "": return '""' # No paths in here ever contain unescapable chars; basic backslash + quote escape. return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' def render_c(records) -> str: lines = [ "/*", " * SKELETONKEY — verification records table", " *", " * AUTO-GENERATED by tools/refresh-verifications.py from", " * docs/VERIFICATIONS.jsonl. Do not hand-edit; rerun the script.", " *", " * Source: tools/verify-vm/verify.sh appends one JSON record per", " * run; this generator dedupes to (module, vm_box, kernel, expect)", " * and keeps the latest by verified_at.", " */", "", '#include "verifications.h"', "", "#include ", "#include ", "#include ", "", "const struct verification_record verifications[] = {", ] for r in records: lines.append(" {") lines.append(f" .module = {cstr(r.get('module'))},") lines.append(f" .verified_at = {cstr(date_only(r.get('verified_at', '')))},") lines.append(f" .host_kernel = {cstr(r.get('host_kernel'))},") lines.append(f" .host_distro = {cstr(r.get('host_distro'))},") lines.append(f" .vm_box = {cstr(r.get('vm_box'))},") lines.append(f" .expect_detect = {cstr(r.get('expect_detect'))},") lines.append(f" .actual_detect = {cstr(r.get('actual_detect'))},") lines.append(f" .status = {cstr(r.get('status'))},") lines.append(" },") lines += [ "};", "", "const size_t verifications_count =", " sizeof(verifications) / sizeof(verifications[0]);", "", "const struct verification_record *", "verifications_for_module(const char *module, size_t *count_out)", "{", " if (count_out) *count_out = 0;", " if (!module) return NULL;", " const struct verification_record *first = NULL;", " size_t n = 0;", " for (size_t i = 0; i < verifications_count; i++) {", " if (strcmp(verifications[i].module, module) == 0) {", " if (first == NULL) first = &verifications[i];", " n++;", " }", " }", " if (count_out) *count_out = n;", " return first;", "}", "", "bool verifications_module_has_match(const char *module)", "{", " size_t n = 0;", " const struct verification_record *r = verifications_for_module(module, &n);", " for (size_t i = 0; i < n; i++)", " if (r[i].status && strcmp(r[i].status, \"match\") == 0)", " return true;", " return false;", "}", "", ] return "\n".join(lines) def main() -> int: ap = argparse.ArgumentParser(description=__doc__.splitlines()[1]) ap.add_argument("--check", action="store_true", help="diff against committed core/verifications.c; exit 1 on drift") args = ap.parse_args() records = dedup_latest(load_records()) text = render_c(records) if args.check: existing = OUT_C.read_text() if OUT_C.exists() else "" if existing == text: print(f"[+] core/verifications.c is current ({len(records)} record(s))", file=sys.stderr) return 0 print("[!] core/verifications.c drifted — rerun " "tools/refresh-verifications.py", file=sys.stderr) return 1 OUT_C.write_text(text) print(f"[+] wrote {OUT_C.relative_to(REPO)} ({len(records)} record(s))", file=sys.stderr) return 0 if __name__ == "__main__": sys.exit(main())