Files
thgtoa/scripts/verify_pdf.py
T
nopeitsnothing c5e5ae48e1 ci: refactoring some things and removing others
Lots of source additions here from long-standing notes over the past few
months. Squashed to make it neater than 219 commits.

- bump version to v1.2.4, Jun 2026
- expand Tor section with new "Traffic analysis and the limits of Tor" subsection
  guard node persistence, website fingerprinting, and a practical breakdown of
  when Tor is and is not sufficient
- expand hardware/firmware threat section with new subsections on firmware
  implants, USB attack hardware (O.MG Cable, Rubber Ducky), Evil Maid attacks,
  supply chain compromise, and a physical inspection checklist
- rename "Removing Metadata from Files/Documents/Pictures" section to "Metadata
  auditing"; add reference table of tools by file type; expand EXIF/XMP coverage,
  PDF metadata (font fingerprinting), and DOCX revision history with real-world
  source identification cases; restructure subsections
- add introductory paragraph to "Your Metadata" section
- add new appendix B8: operational security failure case studies with common
  threads
- add new appendix B9: post-quantum cryptography covering HNDL threat, NIST PQC
  standards, Signal's PQXDH, browser hybrid KEM, PGP limitations, VPN guidance,
  and Monero note
- add new appendix C1: stylometric analysis and writing style covering features
  measured, deployed tools, real cases (J.K. Rowling), effective and ineffective
  countermeasures including AI rewriting
- fix Dangerzone GitHub URL (firstlook -> freedomofpress)
- Remove duplicate footnote [^500]; minor wording fixes ("users" -> "people",
  passive voice tweaks, cross-reference updates)

- docs/index.md: both MSK and RSK GPG fingerprints in a collapsible tip admonition
  instead of bare text
- docs/about/index.md: convert Note admonitions to tip; reformat social media
  links into collapsible tip block
- docs/mirrors/index.md: simplify PDF download instructions to point to Releases;
- README.md: add star history chart
- mkdocs.yml: rename site to "The Hitchhiker's Guide"; update site description
  with hashtags

- sign.yml: remove commented-out workflow_run trigger and if: condition; add
  verify job that runs after sign, downloads artifacts, runs verify_pdf.py, and
  writes a full job summary with hashes; update artifact upload description; minor
  comment and whitespace cleanup
- release.yml, changelog.yml: replace decorative banner comments with single-line
  comments; fix trailing-space style in permissions block
- publish.yml: remove stale comment about nomaterial theme
- verify_pdf.py: full rewrite: replace single-hash-file lookup with flexible
  resolver that checks both bare hash files (.sha256, .b2sum) and two-column
  sumfiles (sha256sums.txt, b2sums.txt); add BLAKE2b verification alongside
  SHA-256; fix signature extension (.asc not .sig); improve CLI (--file,
  --export-dir flags; remove --all; default runs all checks); improve VirusTotal
  output with direct link; cleaner output formatting with ruled separators
2026-05-30 09:32:16 -04:00

261 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""Verification script for thgtoa PDF releases.
Verifies SHA-256 hashes, BLAKE2b hashes, and GPG signatures (.asc) for
the light and dark PDFs. Optionally checks VirusTotal scan status.
Usage:
python scripts/verify_pdf.py
python scripts/verify_pdf.py --hashes
python scripts/verify_pdf.py --signatures
python scripts/verify_pdf.py --vt
python scripts/verify_pdf.py --file export/thgtoa.pdf --hashes
"""
from __future__ import annotations
import argparse
import hashlib
import json
import os
import subprocess
import sys
import urllib.request
from pathlib import Path
def repo_root() -> Path:
return Path(__file__).resolve().parent.parent
def _read_bare_hash(hash_file: Path) -> str | None:
"""Read a bare hex digest from a single-value hash file."""
try:
return hash_file.read_text(encoding="utf-8").strip().split()[0]
except (OSError, IndexError):
return None
def _read_hash_from_sumfile(sum_file: Path, pdf_path: Path) -> str | None:
"""Read a hash from a two-column sumfile (sha256sum / b2sum format).
Matches on the filename only (not the full path) so the file can be used
regardless of where the PDFs sit on disk.
"""
if not sum_file.exists():
return None
target = pdf_path.name
try:
for line in sum_file.read_text(encoding="utf-8").splitlines():
parts = line.strip().split(None, 1)
if len(parts) == 2 and Path(parts[1].lstrip("*")).name == target:
return parts[0]
except OSError:
return None
return None
# Hash verification
def _sha256(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as fh:
for chunk in iter(lambda: fh.read(65536), b""):
h.update(chunk)
return h.hexdigest()
def _blake2b(path: Path) -> str:
h = hashlib.blake2b()
with path.open("rb") as fh:
for chunk in iter(lambda: fh.read(65536), b""):
h.update(chunk)
return h.hexdigest()
def verify_hashes(pdf: Path, export_dir: Path) -> bool:
"""Verify all available hash files for a PDF. Returns True if all pass."""
stem = pdf.name # e.g. "thgtoa.pdf" or "thgtoa-dark.pdf"
results: list[bool] = []
checks = [
("SHA-256", _sha256, export_dir / f"{stem}.sha256", export_dir / "sha256sums.txt"),
("BLAKE2b", _blake2b, export_dir / f"{stem}.b2sum", export_dir / "b2sums.txt"),
]
for algo, fn, bare_file, sum_file in checks:
# Resolve expected hash — prefer bare file, fall back to sumfile
expected = _read_bare_hash(bare_file) if bare_file.exists() else None
if expected is None:
expected = _read_hash_from_sumfile(sum_file, pdf)
if expected is None:
print(f"{algo}: no hash file found (checked {bare_file.name}, {sum_file.name})")
continue
actual = fn(pdf)
ok = actual == expected
results.append(ok)
mark = "" if ok else ""
print(f" {mark} {algo}")
if not ok:
print(f" expected: {expected}")
print(f" actual: {actual}")
return all(results) if results else False
# Signature verification
def verify_signature(pdf: Path) -> bool | None:
"""Verify the .asc detached signature for a PDF.
Returns True on success, False on failure, None if GPG is not installed
or the signature file is missing.
"""
sig = pdf.with_suffix(pdf.suffix + ".asc")
if not sig.exists():
print(f" ⚠ Signature file not found: {sig.name}")
return None
try:
result = subprocess.run(
["gpg", "--verify", str(sig), str(pdf)],
capture_output=True,
text=True,
check=False,
)
except FileNotFoundError:
print(" ⚠ GPG not installed — skipping signature verification")
return None
if result.returncode == 0:
print(f" ✓ GPG signature valid")
# Surface the key info line from stderr (that's where gpg writes it)
for line in result.stderr.splitlines():
if any(kw in line for kw in ("Good signature", "key ID", "fingerprint", "using")):
print(f" {line.strip()}")
return True
else:
print(f" ✗ GPG signature INVALID")
for line in result.stderr.splitlines():
if line.strip():
print(f" {line.strip()}")
return False
# VirusTotal
def check_virustotal(pdf: Path, api_key: str) -> bool:
"""Query VirusTotal for the SHA-256 of a PDF. Returns True if clean."""
file_hash = _sha256(pdf)
url = f"https://www.virustotal.com/api/v3/files/{file_hash}"
req = urllib.request.Request(url, headers={"x-apikey": api_key})
try:
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
if e.code == 404:
print(f" ⚠ Not yet scanned on VirusTotal (hash: {file_hash[:16]}…)")
else:
print(f" ⚠ VirusTotal HTTP error: {e.code}")
return False
except Exception as e:
print(f" ⚠ VirusTotal error: {e}")
return False
stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
malicious = stats.get("malicious", 0)
suspicious = stats.get("suspicious", 0)
undetected = stats.get("undetected", 0)
harmless = stats.get("harmless", 0)
total = malicious + suspicious + undetected + harmless
clean = malicious == 0 and suspicious == 0
mark = "" if clean else ""
print(f" {mark} VirusTotal ({malicious} malicious, {suspicious} suspicious, "
f"{harmless} clean / {total} engines)")
print(f" https://www.virustotal.com/gui/file/{file_hash}")
return clean
def main() -> int:
root = repo_root()
export = root / "export"
ap = argparse.ArgumentParser(
description="Verify thgtoa PDF hashes, signatures, and VirusTotal status.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
ap.add_argument(
"--file",
type=Path,
default=None,
metavar="PDF",
help="Verify a single PDF instead of both light and dark",
)
ap.add_argument(
"--export-dir",
type=Path,
default=export,
metavar="DIR",
help=f"Directory containing hash and signature files (default: {export})",
)
ap.add_argument("--hashes", action="store_true", help="Verify hashes only")
ap.add_argument("--signatures", action="store_true", help="Verify signatures only")
ap.add_argument("--vt", action="store_true", help="Check VirusTotal status")
args = ap.parse_args()
# Default: verify everything
do_hashes = args.hashes or not any([args.hashes, args.signatures, args.vt])
do_sigs = args.signatures or not any([args.hashes, args.signatures, args.vt])
do_vt = args.vt or not any([args.hashes, args.signatures, args.vt])
# Resolve PDFs to check
if args.file:
pdfs = [args.file]
else:
pdfs = [export / "thgtoa.pdf", export / "thgtoa-dark.pdf"]
vt_api_key = os.environ.get("VT_API_KEY", "")
overall_pass = True
for pdf in pdfs:
bar = "" * 60
print(f"\n{bar}")
print(f" {pdf.name}")
print(bar)
if not pdf.exists():
print(f" ⚠ File not found: {pdf} — skipping")
overall_pass = False
continue
if do_hashes:
ok = verify_hashes(pdf, args.export_dir)
if not ok:
overall_pass = False
if do_sigs:
result = verify_signature(pdf)
if result is False:
overall_pass = False
if do_vt:
if not vt_api_key:
print(" ⚠ VT_API_KEY not set — skipping VirusTotal check")
else:
ok = check_virustotal(pdf, vt_api_key)
if not ok:
overall_pass = False
print(f"\n{'' * 60}")
if overall_pass:
print(" ✓ All checks passed")
else:
print(" ✗ One or more checks failed")
print()
return 0 if overall_pass else 1
if __name__ == "__main__":
raise SystemExit(main())