mirror of
https://github.com/Anon-Planet/thgtoa.git
synced 2026-06-11 00:02:29 +02:00
ci: refactoring some things and removing others
Lots of source additions here from long-standing notes over the past few
months. Squashed to make it neater than 219 commits.
- bump version to v1.2.4, Jun 2026
- expand Tor section with new "Traffic analysis and the limits of Tor" subsection
guard node persistence, website fingerprinting, and a practical breakdown of
when Tor is and is not sufficient
- expand hardware/firmware threat section with new subsections on firmware
implants, USB attack hardware (O.MG Cable, Rubber Ducky), Evil Maid attacks,
supply chain compromise, and a physical inspection checklist
- rename "Removing Metadata from Files/Documents/Pictures" section to "Metadata
auditing"; add reference table of tools by file type; expand EXIF/XMP coverage,
PDF metadata (font fingerprinting), and DOCX revision history with real-world
source identification cases; restructure subsections
- add introductory paragraph to "Your Metadata" section
- add new appendix B8: operational security failure case studies with common
threads
- add new appendix B9: post-quantum cryptography covering HNDL threat, NIST PQC
standards, Signal's PQXDH, browser hybrid KEM, PGP limitations, VPN guidance,
and Monero note
- add new appendix C1: stylometric analysis and writing style covering features
measured, deployed tools, real cases (J.K. Rowling), effective and ineffective
countermeasures including AI rewriting
- fix Dangerzone GitHub URL (firstlook -> freedomofpress)
- Remove duplicate footnote [^500]; minor wording fixes ("users" -> "people",
passive voice tweaks, cross-reference updates)
- docs/index.md: both MSK and RSK GPG fingerprints in a collapsible tip admonition
instead of bare text
- docs/about/index.md: convert Note admonitions to tip; reformat social media
links into collapsible tip block
- docs/mirrors/index.md: simplify PDF download instructions to point to Releases;
- README.md: add star history chart
- mkdocs.yml: rename site to "The Hitchhiker's Guide"; update site description
with hashtags
- sign.yml: remove commented-out workflow_run trigger and if: condition; add
verify job that runs after sign, downloads artifacts, runs verify_pdf.py, and
writes a full job summary with hashes; update artifact upload description; minor
comment and whitespace cleanup
- release.yml, changelog.yml: replace decorative banner comments with single-line
comments; fix trailing-space style in permissions block
- publish.yml: remove stale comment about nomaterial theme
- verify_pdf.py: full rewrite: replace single-hash-file lookup with flexible
resolver that checks both bare hash files (.sha256, .b2sum) and two-column
sumfiles (sha256sums.txt, b2sums.txt); add BLAKE2b verification alongside
SHA-256; fix signature extension (.asc not .sig); improve CLI (--file,
--export-dir flags; remove --all; default runs all checks); improve VirusTotal
output with direct link; cleaner output formatting with ruled separators
This commit is contained in:
+199
-153
@@ -1,214 +1,260 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Verification script for PDF files.
|
||||
"""Verification script for thgtoa PDF releases.
|
||||
|
||||
This script verifies:
|
||||
1. SHA256 hash integrity of PDF files
|
||||
2. GPG signature authenticity
|
||||
3. VirusTotal scan status (optional)
|
||||
Verifies SHA-256 hashes, BLAKE2b hashes, and GPG signatures (.asc) for
|
||||
the light and dark PDFs. Optionally checks VirusTotal scan status.
|
||||
|
||||
Usage:
|
||||
python scripts/verify_pdf.py --all # Verify everything
|
||||
python scripts/verify_pdf.py --hashes # Only verify hashes
|
||||
python scripts/verify_pdf.py --signatures # Only verify signatures
|
||||
python scripts/verify_pdf.py --vt # Check VT status (requires API key)
|
||||
|
||||
Examples:
|
||||
python scripts/verify_pdf.py --all
|
||||
python scripts/verify_pdf.py --hashes --file export/thgtoa.pdf
|
||||
python scripts/verify_pdf.py
|
||||
python scripts/verify_pdf.py --hashes
|
||||
python scripts/verify_pdf.py --signatures
|
||||
python scripts/verify_pdf.py --vt
|
||||
python scripts/verify_pdf.py --file export/thgtoa.pdf --hashes
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
def repo_root() -> Path:
|
||||
return Path(__file__).resolve().parent.parent
|
||||
|
||||
def calculate_sha256(file_path: Path) -> str:
|
||||
"""Calculate SHA256 hash of a file."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def verify_hash(file_path: Path, expected_hash: str) -> bool:
|
||||
"""Verify file hash against expected value."""
|
||||
actual_hash = calculate_sha256(file_path)
|
||||
is_valid = actual_hash == expected_hash
|
||||
status = "✓ PASS" if is_valid else "✗ FAIL"
|
||||
print(f"{status}: {file_path.name}")
|
||||
print(f" Expected: {expected_hash}")
|
||||
print(f" Actual: {actual_hash}")
|
||||
return is_valid
|
||||
def _read_bare_hash(hash_file: Path) -> str | None:
|
||||
"""Read a bare hex digest from a single-value hash file."""
|
||||
try:
|
||||
return hash_file.read_text(encoding="utf-8").strip().split()[0]
|
||||
except (OSError, IndexError):
|
||||
return None
|
||||
|
||||
def verify_signature(file_path: Path, sig_file: Path) -> bool:
|
||||
"""Verify GPG signature of a file."""
|
||||
if not sig_file.exists():
|
||||
print(f"✗ FAIL: Signature file not found: {sig_file}")
|
||||
return False
|
||||
|
||||
def _read_hash_from_sumfile(sum_file: Path, pdf_path: Path) -> str | None:
|
||||
"""Read a hash from a two-column sumfile (sha256sum / b2sum format).
|
||||
|
||||
Matches on the filename only (not the full path) so the file can be used
|
||||
regardless of where the PDFs sit on disk.
|
||||
"""
|
||||
if not sum_file.exists():
|
||||
return None
|
||||
target = pdf_path.name
|
||||
try:
|
||||
for line in sum_file.read_text(encoding="utf-8").splitlines():
|
||||
parts = line.strip().split(None, 1)
|
||||
if len(parts) == 2 and Path(parts[1].lstrip("*")).name == target:
|
||||
return parts[0]
|
||||
except OSError:
|
||||
return None
|
||||
return None
|
||||
|
||||
# Hash verification
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _blake2b(path: Path) -> str:
|
||||
h = hashlib.blake2b()
|
||||
with path.open("rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def verify_hashes(pdf: Path, export_dir: Path) -> bool:
|
||||
"""Verify all available hash files for a PDF. Returns True if all pass."""
|
||||
stem = pdf.name # e.g. "thgtoa.pdf" or "thgtoa-dark.pdf"
|
||||
results: list[bool] = []
|
||||
|
||||
checks = [
|
||||
("SHA-256", _sha256, export_dir / f"{stem}.sha256", export_dir / "sha256sums.txt"),
|
||||
("BLAKE2b", _blake2b, export_dir / f"{stem}.b2sum", export_dir / "b2sums.txt"),
|
||||
]
|
||||
|
||||
for algo, fn, bare_file, sum_file in checks:
|
||||
# Resolve expected hash — prefer bare file, fall back to sumfile
|
||||
expected = _read_bare_hash(bare_file) if bare_file.exists() else None
|
||||
if expected is None:
|
||||
expected = _read_hash_from_sumfile(sum_file, pdf)
|
||||
if expected is None:
|
||||
print(f" ⚠ {algo}: no hash file found (checked {bare_file.name}, {sum_file.name})")
|
||||
continue
|
||||
|
||||
actual = fn(pdf)
|
||||
ok = actual == expected
|
||||
results.append(ok)
|
||||
mark = "✓" if ok else "✗"
|
||||
print(f" {mark} {algo}")
|
||||
if not ok:
|
||||
print(f" expected: {expected}")
|
||||
print(f" actual: {actual}")
|
||||
|
||||
return all(results) if results else False
|
||||
|
||||
# Signature verification
|
||||
|
||||
def verify_signature(pdf: Path) -> bool | None:
|
||||
"""Verify the .asc detached signature for a PDF.
|
||||
|
||||
Returns True on success, False on failure, None if GPG is not installed
|
||||
or the signature file is missing.
|
||||
"""
|
||||
sig = pdf.with_suffix(pdf.suffix + ".asc")
|
||||
if not sig.exists():
|
||||
print(f" ⚠ Signature file not found: {sig.name}")
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["gpg", "--verify", str(sig_file), str(file_path)],
|
||||
["gpg", "--verify", str(sig), str(pdf)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"✓ PASS: {file_path.name} signature verified")
|
||||
# Extract key info from GPG output
|
||||
for line in result.stdout.split('\n'):
|
||||
if 'Good signature' in line or 'key ID' in line.lower():
|
||||
print(f" {line.strip()}")
|
||||
return True
|
||||
else:
|
||||
print(f"✗ FAIL: {file_path.name} signature verification failed")
|
||||
print(f" Error: {result.stderr}")
|
||||
return False
|
||||
|
||||
except FileNotFoundError:
|
||||
print("⚠ WARNING: GPG not installed. Skipping signature verification.")
|
||||
print(" ⚠ GPG not installed — skipping signature verification")
|
||||
return None
|
||||
|
||||
def verify_from_hash_file(file_path: Path, hash_file: Path) -> bool:
|
||||
"""Verify file hash from a hash file."""
|
||||
if not hash_file.exists():
|
||||
print(f"✗ FAIL: Hash file not found: {hash_file}")
|
||||
if result.returncode == 0:
|
||||
print(f" ✓ GPG signature valid")
|
||||
# Surface the key info line from stderr (that's where gpg writes it)
|
||||
for line in result.stderr.splitlines():
|
||||
if any(kw in line for kw in ("Good signature", "key ID", "fingerprint", "using")):
|
||||
print(f" {line.strip()}")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ GPG signature INVALID")
|
||||
for line in result.stderr.splitlines():
|
||||
if line.strip():
|
||||
print(f" {line.strip()}")
|
||||
return False
|
||||
|
||||
expected_hash = None
|
||||
with open(hash_file, 'r') as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2 and parts[1] == str(file_path):
|
||||
expected_hash = parts[0]
|
||||
break
|
||||
# VirusTotal
|
||||
|
||||
if not expected_hash:
|
||||
print(f"✗ FAIL: Hash not found in {hash_file.name} for {file_path.name}")
|
||||
return False
|
||||
|
||||
return verify_hash(file_path, expected_hash)
|
||||
|
||||
def check_virustotal(file_hash: str, api_key: str | None = None) -> dict | None:
|
||||
"""Check VirusTotal scan status for a file hash."""
|
||||
if not api_key:
|
||||
print("⚠ WARNING: VT_API_KEY not set. Skipping VirusTotal check.")
|
||||
return None
|
||||
def check_virustotal(pdf: Path, api_key: str) -> bool:
|
||||
"""Query VirusTotal for the SHA-256 of a PDF. Returns True if clean."""
|
||||
file_hash = _sha256(pdf)
|
||||
url = f"https://www.virustotal.com/api/v3/files/{file_hash}"
|
||||
req = urllib.request.Request(url, headers={"x-apikey": api_key})
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
url = f"https://www.virustotal.com/api/v3/files/{file_hash}"
|
||||
request = urllib.request.Request(url, headers={"x-apikey": api_key})
|
||||
|
||||
with urllib.request.urlopen(request, timeout=30) as response:
|
||||
data = json.loads(response.read().decode())
|
||||
|
||||
stats = data.get('data', {}).get('attributes', {}).get('last_analysis_stats', {})
|
||||
total = sum(stats.values()) if stats else 0
|
||||
|
||||
print(f"\n🦠 VirusTotal Results for {file_hash[:16]}...")
|
||||
print(f" Total scans: {total}")
|
||||
|
||||
if stats:
|
||||
print(f" Malicious: {stats.get('malicious', 0)}")
|
||||
print(f" Suspicious: {stats.get('suspicious', 0)}")
|
||||
print(f" Undetected: {stats.get('undetected', 0)}")
|
||||
print(f" Clean: {stats.get('harmless', 0)}")
|
||||
|
||||
return data
|
||||
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
print(f" ⚠ Not yet scanned on VirusTotal (hash: {file_hash[:16]}…)")
|
||||
else:
|
||||
print(f" ⚠ VirusTotal HTTP error: {e.code}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠ ERROR checking VirusTotal: {e}")
|
||||
return None
|
||||
print(f" ⚠ VirusTotal error: {e}")
|
||||
return False
|
||||
|
||||
stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
|
||||
malicious = stats.get("malicious", 0)
|
||||
suspicious = stats.get("suspicious", 0)
|
||||
undetected = stats.get("undetected", 0)
|
||||
harmless = stats.get("harmless", 0)
|
||||
total = malicious + suspicious + undetected + harmless
|
||||
|
||||
clean = malicious == 0 and suspicious == 0
|
||||
mark = "✓" if clean else "✗"
|
||||
print(f" {mark} VirusTotal ({malicious} malicious, {suspicious} suspicious, "
|
||||
f"{harmless} clean / {total} engines)")
|
||||
print(f" https://www.virustotal.com/gui/file/{file_hash}")
|
||||
return clean
|
||||
|
||||
def main() -> int:
|
||||
root = repo_root()
|
||||
ap = argparse.ArgumentParser(description="Verify PDF files (hashes, signatures, VT).")
|
||||
export = root / "export"
|
||||
|
||||
# File paths
|
||||
ap.add_argument(
|
||||
"--light-pdf",
|
||||
type=Path,
|
||||
default=root / "export" / "thgtoa.pdf",
|
||||
help="Light mode PDF file",
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Verify thgtoa PDF hashes, signatures, and VirusTotal status.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
ap.add_argument(
|
||||
"--dark-pdf",
|
||||
"--file",
|
||||
type=Path,
|
||||
default=root / "export" / "thgtoa-dark.pdf",
|
||||
help="Dark mode PDF file",
|
||||
default=None,
|
||||
metavar="PDF",
|
||||
help="Verify a single PDF instead of both light and dark",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--hash-file",
|
||||
"--export-dir",
|
||||
type=Path,
|
||||
default=root / "export" / "thgtoa.pdf.sha256",
|
||||
help="Hash file to verify against",
|
||||
default=export,
|
||||
metavar="DIR",
|
||||
help=f"Directory containing hash and signature files (default: {export})",
|
||||
)
|
||||
|
||||
# Verification modes
|
||||
group = ap.add_mutually_exclusive_group()
|
||||
group.add_argument("--all", action="store_true", help="Verify everything")
|
||||
group.add_argument("--hashes", action="store_true", help="Only verify hashes")
|
||||
group.add_argument("--signatures", action="store_true", help="Only verify signatures")
|
||||
ap.add_argument("--vt", action="store_true", help="Check VirusTotal status")
|
||||
|
||||
ap.add_argument("--hashes", action="store_true", help="Verify hashes only")
|
||||
ap.add_argument("--signatures", action="store_true", help="Verify signatures only")
|
||||
ap.add_argument("--vt", action="store_true", help="Check VirusTotal status")
|
||||
args = ap.parse_args()
|
||||
|
||||
# Determine what to verify
|
||||
if not any([args.all, args.hashes, args.signatures, args.vt]):
|
||||
args.all = True
|
||||
# Default: verify everything
|
||||
do_hashes = args.hashes or not any([args.hashes, args.signatures, args.vt])
|
||||
do_sigs = args.signatures or not any([args.hashes, args.signatures, args.vt])
|
||||
do_vt = args.vt or not any([args.hashes, args.signatures, args.vt])
|
||||
|
||||
all_passed = True
|
||||
# Resolve PDFs to check
|
||||
if args.file:
|
||||
pdfs = [args.file]
|
||||
else:
|
||||
pdfs = [export / "thgtoa.pdf", export / "thgtoa-dark.pdf"]
|
||||
|
||||
pdf_files = [
|
||||
("Light", args.light_pdf),
|
||||
("Dark", args.dark_pdf),
|
||||
]
|
||||
vt_api_key = os.environ.get("VT_API_KEY", "")
|
||||
|
||||
for mode_name, pdf_file in pdf_files:
|
||||
if not pdf_file.exists():
|
||||
print(f"⚠ WARNING: {pdf_file.name} not found. Skipping.")
|
||||
overall_pass = True
|
||||
|
||||
for pdf in pdfs:
|
||||
bar = "─" * 60
|
||||
print(f"\n{bar}")
|
||||
print(f" {pdf.name}")
|
||||
print(bar)
|
||||
|
||||
if not pdf.exists():
|
||||
print(f" ⚠ File not found: {pdf} — skipping")
|
||||
overall_pass = False
|
||||
continue
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Verifying {mode_name} PDF: {pdf_file.name}")
|
||||
print('='*60)
|
||||
if do_hashes:
|
||||
ok = verify_hashes(pdf, args.export_dir)
|
||||
if not ok:
|
||||
overall_pass = False
|
||||
|
||||
# Verify hash if requested
|
||||
if args.all or args.hashes:
|
||||
if not verify_from_hash_file(pdf_file, args.hash_file):
|
||||
all_passed = False
|
||||
if do_sigs:
|
||||
result = verify_signature(pdf)
|
||||
if result is False:
|
||||
overall_pass = False
|
||||
|
||||
# Verify signature if requested
|
||||
if args.all or args.signatures:
|
||||
sig_file = pdf_file.with_suffix(pdf_file.suffix + ".sig")
|
||||
result = verify_signature(pdf_file, sig_file)
|
||||
if result is False: # None means skipped (GPG not installed)
|
||||
all_passed = False
|
||||
if do_vt:
|
||||
if not vt_api_key:
|
||||
print(" ⚠ VT_API_KEY not set — skipping VirusTotal check")
|
||||
else:
|
||||
ok = check_virustotal(pdf, vt_api_key)
|
||||
if not ok:
|
||||
overall_pass = False
|
||||
|
||||
# Check VirusTotal if requested
|
||||
if args.all or args.vt:
|
||||
file_hash = calculate_sha256(pdf_file)
|
||||
api_key = os.environ.get("VT_API_KEY")
|
||||
check_virustotal(file_hash, api_key)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
if all_passed:
|
||||
print("✓ All verifications PASSED")
|
||||
return 0
|
||||
print(f"\n{'─' * 60}")
|
||||
if overall_pass:
|
||||
print(" ✓ All checks passed")
|
||||
else:
|
||||
print("✗ Some verifications FAILED")
|
||||
return 1
|
||||
print(" ✗ One or more checks failed")
|
||||
print()
|
||||
|
||||
return 0 if overall_pass else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
Reference in New Issue
Block a user