mirror of
https://github.com/Anon-Planet/thgtoa.git
synced 2026-06-10 15:52:29 +02:00
1/8 feat(pdf): add pixel-based dark mode PDF converter
Replaces the broken --prefers-color-scheme=dark Chromium flag with a pixel-level converter. Rasterizes pages via pdftoppm, remaps colors to the hacker theme (#1f1f31 bg, #e0e0e0 text, #5e8bde links), and reassembles with qpdf. Processes in batches of 50 pages to avoid OOM on large documents like the 414-page guide. Signed-off-by: nopeitsnothing <no@anonymousplanet.org>
This commit is contained in:
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dark-mode PDF converter (pixel-based, batch-safe).
|
||||
|
||||
Rasterizes each page with pdftoppm, applies the hacker theme palette
|
||||
pixel-by-pixel, then reassembles into a PDF. Processes in batches of 50
|
||||
pages to stay within memory limits on large documents, then merges with qpdf.
|
||||
|
||||
Usage:
|
||||
python scripts/convert.py INPUT.pdf [OUTPUT.pdf]
|
||||
python scripts/convert.py INPUT.pdf [OUTPUT.pdf] [--dpi 200]
|
||||
[--bg 1f1f31] [--text e0e0e0] [--link 5e8bde]
|
||||
[--batch-size 50]
|
||||
|
||||
Examples:
|
||||
python scripts/convert.py export/thgtoa.pdf export/thgtoa-dark.pdf
|
||||
python scripts/convert.py export/thgtoa.pdf --dpi 150 --bg 0d1117
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Defaults (Hacker theme)
|
||||
# --------------------------------------------------------------------------- #
|
||||
DEFAULT_BG = (0x1f, 0x1f, 0x31)
|
||||
DEFAULT_TEXT = (0xe0, 0xe0, 0xe0)
|
||||
DEFAULT_LINK = (0x5e, 0x8b, 0xde)
|
||||
DEFAULT_DPI = 200
|
||||
DEFAULT_BATCH = 50
|
||||
|
||||
|
||||
def hex_to_rgb(h: str) -> tuple:
|
||||
h = h.lstrip('#')
|
||||
return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
|
||||
|
||||
|
||||
def apply_dark_theme(
|
||||
img: Image.Image,
|
||||
bg=DEFAULT_BG,
|
||||
text=DEFAULT_TEXT,
|
||||
link=DEFAULT_LINK,
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Remap a white-background page image to a dark theme.
|
||||
- Near-white pixels → bg color
|
||||
- Dark pixels (ink/text) → text color
|
||||
- Blue-ish pixels → link color
|
||||
"""
|
||||
arr = np.array(img.convert('RGB'), dtype=np.float32)
|
||||
orig = arr.copy()
|
||||
norm = arr / 255.0
|
||||
|
||||
lightness = (
|
||||
0.299 * norm[:, :, 0]
|
||||
+ 0.587 * norm[:, :, 1]
|
||||
+ 0.114 * norm[:, :, 2]
|
||||
)
|
||||
|
||||
r, g, b = orig[:, :, 0], orig[:, :, 1], orig[:, :, 2]
|
||||
link_mask = (
|
||||
(b > 100)
|
||||
& (b > r * 1.3)
|
||||
& (b > g * 0.9)
|
||||
& (lightness < 0.85)
|
||||
)
|
||||
content_mask = (lightness < 0.85) & ~link_mask
|
||||
blend = ((1.0 - lightness) / 0.85).clip(0, 1)
|
||||
|
||||
bg_f = [c / 255.0 for c in bg]
|
||||
text_f = [c / 255.0 for c in text]
|
||||
link_f = [c / 255.0 for c in link]
|
||||
|
||||
out = np.zeros_like(norm)
|
||||
for i, (b_c, t, lc) in enumerate(zip(bg_f, text_f, link_f)):
|
||||
channel = np.full(lightness.shape, b_c)
|
||||
channel = np.where(content_mask, b_c + blend * (t - b_c), channel)
|
||||
channel = np.where(link_mask, b_c + blend * (lc - b_c), channel)
|
||||
out[:, :, i] = channel
|
||||
|
||||
return Image.fromarray((out * 255).clip(0, 255).astype('uint8'))
|
||||
|
||||
|
||||
def _check_qpdf() -> bool:
|
||||
return subprocess.run(
|
||||
['qpdf', '--version'], capture_output=True
|
||||
).returncode == 0
|
||||
|
||||
|
||||
def convert_pdf_to_dark(
|
||||
input_path: str | Path,
|
||||
output_path: str | Path,
|
||||
dpi: int = DEFAULT_DPI,
|
||||
bg=DEFAULT_BG,
|
||||
text=DEFAULT_TEXT,
|
||||
link=DEFAULT_LINK,
|
||||
batch_size: int = DEFAULT_BATCH,
|
||||
) -> None:
|
||||
"""
|
||||
Full pipeline: rasterize → apply dark theme → reassemble as PDF.
|
||||
|
||||
For large documents, pages are processed in batches of `batch_size` to
|
||||
avoid OOM, then merged with qpdf. Falls back to single-pass Pillow save
|
||||
if qpdf is not available (fine for small documents).
|
||||
"""
|
||||
input_path = str(input_path)
|
||||
output_path = str(output_path)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
# 1. Rasterize all pages
|
||||
prefix = os.path.join(tmp, 'page')
|
||||
result = subprocess.run(
|
||||
['pdftoppm', '-r', str(dpi), '-png', input_path, prefix],
|
||||
capture_output=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"pdftoppm failed:\n{result.stderr.decode()}"
|
||||
)
|
||||
|
||||
pages = sorted(glob.glob(prefix + '-*.png'))
|
||||
if not pages:
|
||||
raise RuntimeError(
|
||||
"pdftoppm produced no output pages — "
|
||||
"is the PDF valid and not password-protected?"
|
||||
)
|
||||
|
||||
total = len(pages)
|
||||
print(f" Converting {total} page(s) at {dpi} DPI…", flush=True)
|
||||
|
||||
out_dir = os.path.dirname(output_path)
|
||||
if out_dir:
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
# 2. Process in batches
|
||||
use_batches = total > batch_size and _check_qpdf()
|
||||
|
||||
if use_batches:
|
||||
batch_dir = os.path.join(tmp, 'batches')
|
||||
os.makedirs(batch_dir)
|
||||
batch_files = []
|
||||
|
||||
for start in range(0, total, batch_size):
|
||||
batch = pages[start:start + batch_size]
|
||||
batch_num = start // batch_size + 1
|
||||
batch_path = os.path.join(batch_dir, f'batch_{batch_num:04d}.pdf')
|
||||
|
||||
print(
|
||||
f" Batch {batch_num}/{(total + batch_size - 1) // batch_size}: "
|
||||
f"pages {start + 1}–{start + len(batch)}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
dark = [apply_dark_theme(Image.open(p), bg, text, link) for p in batch]
|
||||
dark[0].save(batch_path, save_all=True, append_images=dark[1:])
|
||||
batch_files.append(batch_path)
|
||||
del dark
|
||||
|
||||
# 3. Merge batches with qpdf
|
||||
print(" Merging batches…", flush=True)
|
||||
subprocess.run(
|
||||
['qpdf', '--empty', '--pages'] + batch_files + ['--', output_path],
|
||||
check=True,
|
||||
)
|
||||
|
||||
else:
|
||||
# Single-pass for small documents or when qpdf is unavailable
|
||||
dark_pages = []
|
||||
for i, p in enumerate(pages, 1):
|
||||
if i % 50 == 0 or i == 1:
|
||||
print(f" Page {i}/{total}", flush=True)
|
||||
dark_pages.append(apply_dark_theme(Image.open(p), bg, text, link))
|
||||
|
||||
dark_pages[0].save(
|
||||
output_path,
|
||||
save_all=True,
|
||||
append_images=dark_pages[1:],
|
||||
)
|
||||
|
||||
size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||
print(f" Saved → {output_path} ({size_mb:.1f} MB)")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# CLI
|
||||
# --------------------------------------------------------------------------- #
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description='Convert a PDF to dark mode.')
|
||||
parser.add_argument('input', help='Input PDF path')
|
||||
parser.add_argument('output', nargs='?', help='Output PDF path (optional)')
|
||||
parser.add_argument('--dpi', type=int, default=DEFAULT_DPI, help='Rasterization DPI (default: 200)')
|
||||
parser.add_argument('--batch-size', type=int, default=DEFAULT_BATCH, help='Pages per batch (default: 50)')
|
||||
parser.add_argument('--bg', default='1f1f31', help='Background hex color (default: 1f1f31)')
|
||||
parser.add_argument('--text', default='e0e0e0', help='Body text hex color (default: e0e0e0)')
|
||||
parser.add_argument('--link', default='5e8bde', help='Link/blue hex color (default: 5e8bde)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.output:
|
||||
base = Path(args.input).stem
|
||||
args.output = str(Path(args.input).parent / f"{base}-dark.pdf")
|
||||
|
||||
convert_pdf_to_dark(
|
||||
args.input,
|
||||
args.output,
|
||||
dpi=args.dpi,
|
||||
bg=hex_to_rgb(args.bg),
|
||||
text=hex_to_rgb(args.text),
|
||||
link=hex_to_rgb(args.link),
|
||||
batch_size=args.batch_size,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user