1/8 feat(pdf): add pixel-based dark mode PDF converter

Replaces the broken --prefers-color-scheme=dark Chromium flag with a
pixel-level converter. Rasterizes pages via pdftoppm, remaps colors to
the hacker theme (#1f1f31 bg, #e0e0e0 text, #5e8bde links), and
reassembles with qpdf. Processes in batches of 50 pages to avoid OOM
on large documents like the 414-page guide.

Signed-off-by: nopeitsnothing <no@anonymousplanet.org>
This commit is contained in:
nopeitsnothing
2026-05-22 16:24:03 -04:00
parent 3184181fa8
commit 68b2687b6b
+226
View File
@@ -0,0 +1,226 @@
#!/usr/bin/env python3
"""
Dark-mode PDF converter (pixel-based, batch-safe).
Rasterizes each page with pdftoppm, applies the hacker theme palette
pixel-by-pixel, then reassembles into a PDF. Processes in batches of 50
pages to stay within memory limits on large documents, then merges with qpdf.
Usage:
python scripts/convert.py INPUT.pdf [OUTPUT.pdf]
python scripts/convert.py INPUT.pdf [OUTPUT.pdf] [--dpi 200]
[--bg 1f1f31] [--text e0e0e0] [--link 5e8bde]
[--batch-size 50]
Examples:
python scripts/convert.py export/thgtoa.pdf export/thgtoa-dark.pdf
python scripts/convert.py export/thgtoa.pdf --dpi 150 --bg 0d1117
"""
from __future__ import annotations
import argparse
import glob
import os
import subprocess
import sys
import tempfile
from pathlib import Path
import numpy as np
from PIL import Image
# --------------------------------------------------------------------------- #
# Defaults (Hacker theme)
# --------------------------------------------------------------------------- #
DEFAULT_BG = (0x1f, 0x1f, 0x31)
DEFAULT_TEXT = (0xe0, 0xe0, 0xe0)
DEFAULT_LINK = (0x5e, 0x8b, 0xde)
DEFAULT_DPI = 200
DEFAULT_BATCH = 50
def hex_to_rgb(h: str) -> tuple:
h = h.lstrip('#')
return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
def apply_dark_theme(
img: Image.Image,
bg=DEFAULT_BG,
text=DEFAULT_TEXT,
link=DEFAULT_LINK,
) -> Image.Image:
"""
Remap a white-background page image to a dark theme.
- Near-white pixels → bg color
- Dark pixels (ink/text) → text color
- Blue-ish pixels → link color
"""
arr = np.array(img.convert('RGB'), dtype=np.float32)
orig = arr.copy()
norm = arr / 255.0
lightness = (
0.299 * norm[:, :, 0]
+ 0.587 * norm[:, :, 1]
+ 0.114 * norm[:, :, 2]
)
r, g, b = orig[:, :, 0], orig[:, :, 1], orig[:, :, 2]
link_mask = (
(b > 100)
& (b > r * 1.3)
& (b > g * 0.9)
& (lightness < 0.85)
)
content_mask = (lightness < 0.85) & ~link_mask
blend = ((1.0 - lightness) / 0.85).clip(0, 1)
bg_f = [c / 255.0 for c in bg]
text_f = [c / 255.0 for c in text]
link_f = [c / 255.0 for c in link]
out = np.zeros_like(norm)
for i, (b_c, t, lc) in enumerate(zip(bg_f, text_f, link_f)):
channel = np.full(lightness.shape, b_c)
channel = np.where(content_mask, b_c + blend * (t - b_c), channel)
channel = np.where(link_mask, b_c + blend * (lc - b_c), channel)
out[:, :, i] = channel
return Image.fromarray((out * 255).clip(0, 255).astype('uint8'))
def _check_qpdf() -> bool:
return subprocess.run(
['qpdf', '--version'], capture_output=True
).returncode == 0
def convert_pdf_to_dark(
input_path: str | Path,
output_path: str | Path,
dpi: int = DEFAULT_DPI,
bg=DEFAULT_BG,
text=DEFAULT_TEXT,
link=DEFAULT_LINK,
batch_size: int = DEFAULT_BATCH,
) -> None:
"""
Full pipeline: rasterize → apply dark theme → reassemble as PDF.
For large documents, pages are processed in batches of `batch_size` to
avoid OOM, then merged with qpdf. Falls back to single-pass Pillow save
if qpdf is not available (fine for small documents).
"""
input_path = str(input_path)
output_path = str(output_path)
with tempfile.TemporaryDirectory() as tmp:
# 1. Rasterize all pages
prefix = os.path.join(tmp, 'page')
result = subprocess.run(
['pdftoppm', '-r', str(dpi), '-png', input_path, prefix],
capture_output=True,
)
if result.returncode != 0:
raise RuntimeError(
f"pdftoppm failed:\n{result.stderr.decode()}"
)
pages = sorted(glob.glob(prefix + '-*.png'))
if not pages:
raise RuntimeError(
"pdftoppm produced no output pages — "
"is the PDF valid and not password-protected?"
)
total = len(pages)
print(f" Converting {total} page(s) at {dpi} DPI…", flush=True)
out_dir = os.path.dirname(output_path)
if out_dir:
os.makedirs(out_dir, exist_ok=True)
# 2. Process in batches
use_batches = total > batch_size and _check_qpdf()
if use_batches:
batch_dir = os.path.join(tmp, 'batches')
os.makedirs(batch_dir)
batch_files = []
for start in range(0, total, batch_size):
batch = pages[start:start + batch_size]
batch_num = start // batch_size + 1
batch_path = os.path.join(batch_dir, f'batch_{batch_num:04d}.pdf')
print(
f" Batch {batch_num}/{(total + batch_size - 1) // batch_size}: "
f"pages {start + 1}{start + len(batch)}",
flush=True,
)
dark = [apply_dark_theme(Image.open(p), bg, text, link) for p in batch]
dark[0].save(batch_path, save_all=True, append_images=dark[1:])
batch_files.append(batch_path)
del dark
# 3. Merge batches with qpdf
print(" Merging batches…", flush=True)
subprocess.run(
['qpdf', '--empty', '--pages'] + batch_files + ['--', output_path],
check=True,
)
else:
# Single-pass for small documents or when qpdf is unavailable
dark_pages = []
for i, p in enumerate(pages, 1):
if i % 50 == 0 or i == 1:
print(f" Page {i}/{total}", flush=True)
dark_pages.append(apply_dark_theme(Image.open(p), bg, text, link))
dark_pages[0].save(
output_path,
save_all=True,
append_images=dark_pages[1:],
)
size_mb = os.path.getsize(output_path) / 1024 / 1024
print(f" Saved → {output_path} ({size_mb:.1f} MB)")
# --------------------------------------------------------------------------- #
# CLI
# --------------------------------------------------------------------------- #
def main() -> int:
parser = argparse.ArgumentParser(description='Convert a PDF to dark mode.')
parser.add_argument('input', help='Input PDF path')
parser.add_argument('output', nargs='?', help='Output PDF path (optional)')
parser.add_argument('--dpi', type=int, default=DEFAULT_DPI, help='Rasterization DPI (default: 200)')
parser.add_argument('--batch-size', type=int, default=DEFAULT_BATCH, help='Pages per batch (default: 50)')
parser.add_argument('--bg', default='1f1f31', help='Background hex color (default: 1f1f31)')
parser.add_argument('--text', default='e0e0e0', help='Body text hex color (default: e0e0e0)')
parser.add_argument('--link', default='5e8bde', help='Link/blue hex color (default: 5e8bde)')
args = parser.parse_args()
if not args.output:
base = Path(args.input).stem
args.output = str(Path(args.input).parent / f"{base}-dark.pdf")
convert_pdf_to_dark(
args.input,
args.output,
dpi=args.dpi,
bg=hex_to_rgb(args.bg),
text=hex_to_rgb(args.text),
link=hex_to_rgb(args.link),
batch_size=args.batch_size,
)
return 0
if __name__ == '__main__':
raise SystemExit(main())