From 68b2687b6b2e7ee68a7c70c40a50018a1eaf7558 Mon Sep 17 00:00:00 2001 From: nopeitsnothing Date: Fri, 22 May 2026 16:24:03 -0400 Subject: [PATCH] 1/8 feat(pdf): add pixel-based dark mode PDF converter Replaces the broken --prefers-color-scheme=dark Chromium flag with a pixel-level converter. Rasterizes pages via pdftoppm, remaps colors to the hacker theme (#1f1f31 bg, #e0e0e0 text, #5e8bde links), and reassembles with qpdf. Processes in batches of 50 pages to avoid OOM on large documents like the 414-page guide. Signed-off-by: nopeitsnothing --- scripts/convert.py | 226 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 scripts/convert.py diff --git a/scripts/convert.py b/scripts/convert.py new file mode 100644 index 0000000..0035686 --- /dev/null +++ b/scripts/convert.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +""" +Dark-mode PDF converter (pixel-based, batch-safe). + +Rasterizes each page with pdftoppm, applies the hacker theme palette +pixel-by-pixel, then reassembles into a PDF. Processes in batches of 50 +pages to stay within memory limits on large documents, then merges with qpdf. + +Usage: + python scripts/convert.py INPUT.pdf [OUTPUT.pdf] + python scripts/convert.py INPUT.pdf [OUTPUT.pdf] [--dpi 200] + [--bg 1f1f31] [--text e0e0e0] [--link 5e8bde] + [--batch-size 50] + +Examples: + python scripts/convert.py export/thgtoa.pdf export/thgtoa-dark.pdf + python scripts/convert.py export/thgtoa.pdf --dpi 150 --bg 0d1117 +""" + +from __future__ import annotations + +import argparse +import glob +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +import numpy as np +from PIL import Image + + +# --------------------------------------------------------------------------- # +# Defaults (Hacker theme) +# --------------------------------------------------------------------------- # +DEFAULT_BG = (0x1f, 0x1f, 0x31) +DEFAULT_TEXT = (0xe0, 0xe0, 0xe0) +DEFAULT_LINK = (0x5e, 0x8b, 0xde) +DEFAULT_DPI = 200 +DEFAULT_BATCH = 50 + + +def hex_to_rgb(h: str) -> tuple: + h = h.lstrip('#') + return tuple(int(h[i:i+2], 16) for i in (0, 2, 4)) + + +def apply_dark_theme( + img: Image.Image, + bg=DEFAULT_BG, + text=DEFAULT_TEXT, + link=DEFAULT_LINK, +) -> Image.Image: + """ + Remap a white-background page image to a dark theme. + - Near-white pixels → bg color + - Dark pixels (ink/text) → text color + - Blue-ish pixels → link color + """ + arr = np.array(img.convert('RGB'), dtype=np.float32) + orig = arr.copy() + norm = arr / 255.0 + + lightness = ( + 0.299 * norm[:, :, 0] + + 0.587 * norm[:, :, 1] + + 0.114 * norm[:, :, 2] + ) + + r, g, b = orig[:, :, 0], orig[:, :, 1], orig[:, :, 2] + link_mask = ( + (b > 100) + & (b > r * 1.3) + & (b > g * 0.9) + & (lightness < 0.85) + ) + content_mask = (lightness < 0.85) & ~link_mask + blend = ((1.0 - lightness) / 0.85).clip(0, 1) + + bg_f = [c / 255.0 for c in bg] + text_f = [c / 255.0 for c in text] + link_f = [c / 255.0 for c in link] + + out = np.zeros_like(norm) + for i, (b_c, t, lc) in enumerate(zip(bg_f, text_f, link_f)): + channel = np.full(lightness.shape, b_c) + channel = np.where(content_mask, b_c + blend * (t - b_c), channel) + channel = np.where(link_mask, b_c + blend * (lc - b_c), channel) + out[:, :, i] = channel + + return Image.fromarray((out * 255).clip(0, 255).astype('uint8')) + + +def _check_qpdf() -> bool: + return subprocess.run( + ['qpdf', '--version'], capture_output=True + ).returncode == 0 + + +def convert_pdf_to_dark( + input_path: str | Path, + output_path: str | Path, + dpi: int = DEFAULT_DPI, + bg=DEFAULT_BG, + text=DEFAULT_TEXT, + link=DEFAULT_LINK, + batch_size: int = DEFAULT_BATCH, +) -> None: + """ + Full pipeline: rasterize → apply dark theme → reassemble as PDF. + + For large documents, pages are processed in batches of `batch_size` to + avoid OOM, then merged with qpdf. Falls back to single-pass Pillow save + if qpdf is not available (fine for small documents). + """ + input_path = str(input_path) + output_path = str(output_path) + + with tempfile.TemporaryDirectory() as tmp: + # 1. Rasterize all pages + prefix = os.path.join(tmp, 'page') + result = subprocess.run( + ['pdftoppm', '-r', str(dpi), '-png', input_path, prefix], + capture_output=True, + ) + if result.returncode != 0: + raise RuntimeError( + f"pdftoppm failed:\n{result.stderr.decode()}" + ) + + pages = sorted(glob.glob(prefix + '-*.png')) + if not pages: + raise RuntimeError( + "pdftoppm produced no output pages — " + "is the PDF valid and not password-protected?" + ) + + total = len(pages) + print(f" Converting {total} page(s) at {dpi} DPI…", flush=True) + + out_dir = os.path.dirname(output_path) + if out_dir: + os.makedirs(out_dir, exist_ok=True) + + # 2. Process in batches + use_batches = total > batch_size and _check_qpdf() + + if use_batches: + batch_dir = os.path.join(tmp, 'batches') + os.makedirs(batch_dir) + batch_files = [] + + for start in range(0, total, batch_size): + batch = pages[start:start + batch_size] + batch_num = start // batch_size + 1 + batch_path = os.path.join(batch_dir, f'batch_{batch_num:04d}.pdf') + + print( + f" Batch {batch_num}/{(total + batch_size - 1) // batch_size}: " + f"pages {start + 1}–{start + len(batch)}", + flush=True, + ) + + dark = [apply_dark_theme(Image.open(p), bg, text, link) for p in batch] + dark[0].save(batch_path, save_all=True, append_images=dark[1:]) + batch_files.append(batch_path) + del dark + + # 3. Merge batches with qpdf + print(" Merging batches…", flush=True) + subprocess.run( + ['qpdf', '--empty', '--pages'] + batch_files + ['--', output_path], + check=True, + ) + + else: + # Single-pass for small documents or when qpdf is unavailable + dark_pages = [] + for i, p in enumerate(pages, 1): + if i % 50 == 0 or i == 1: + print(f" Page {i}/{total}", flush=True) + dark_pages.append(apply_dark_theme(Image.open(p), bg, text, link)) + + dark_pages[0].save( + output_path, + save_all=True, + append_images=dark_pages[1:], + ) + + size_mb = os.path.getsize(output_path) / 1024 / 1024 + print(f" Saved → {output_path} ({size_mb:.1f} MB)") + + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def main() -> int: + parser = argparse.ArgumentParser(description='Convert a PDF to dark mode.') + parser.add_argument('input', help='Input PDF path') + parser.add_argument('output', nargs='?', help='Output PDF path (optional)') + parser.add_argument('--dpi', type=int, default=DEFAULT_DPI, help='Rasterization DPI (default: 200)') + parser.add_argument('--batch-size', type=int, default=DEFAULT_BATCH, help='Pages per batch (default: 50)') + parser.add_argument('--bg', default='1f1f31', help='Background hex color (default: 1f1f31)') + parser.add_argument('--text', default='e0e0e0', help='Body text hex color (default: e0e0e0)') + parser.add_argument('--link', default='5e8bde', help='Link/blue hex color (default: 5e8bde)') + args = parser.parse_args() + + if not args.output: + base = Path(args.input).stem + args.output = str(Path(args.input).parent / f"{base}-dark.pdf") + + convert_pdf_to_dark( + args.input, + args.output, + dpi=args.dpi, + bg=hex_to_rgb(args.bg), + text=hex_to_rgb(args.text), + link=hex_to_rgb(args.link), + batch_size=args.batch_size, + ) + return 0 + + +if __name__ == '__main__': + raise SystemExit(main())