mirror of
https://github.com/Anon-Planet/thgtoa.git
synced 2026-06-10 15:52:29 +02:00
1/8 feat(pdf): add pixel-based dark mode PDF converter
Replaces the broken --prefers-color-scheme=dark Chromium flag with a pixel-level converter. Rasterizes pages via pdftoppm, remaps colors to the hacker theme (#1f1f31 bg, #e0e0e0 text, #5e8bde links), and reassembles with qpdf. Processes in batches of 50 pages to avoid OOM on large documents like the 414-page guide. Signed-off-by: nopeitsnothing <no@anonymousplanet.org>
This commit is contained in:
@@ -0,0 +1,226 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Dark-mode PDF converter (pixel-based, batch-safe).
|
||||||
|
|
||||||
|
Rasterizes each page with pdftoppm, applies the hacker theme palette
|
||||||
|
pixel-by-pixel, then reassembles into a PDF. Processes in batches of 50
|
||||||
|
pages to stay within memory limits on large documents, then merges with qpdf.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/convert.py INPUT.pdf [OUTPUT.pdf]
|
||||||
|
python scripts/convert.py INPUT.pdf [OUTPUT.pdf] [--dpi 200]
|
||||||
|
[--bg 1f1f31] [--text e0e0e0] [--link 5e8bde]
|
||||||
|
[--batch-size 50]
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
python scripts/convert.py export/thgtoa.pdf export/thgtoa-dark.pdf
|
||||||
|
python scripts/convert.py export/thgtoa.pdf --dpi 150 --bg 0d1117
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# Defaults (Hacker theme)
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
DEFAULT_BG = (0x1f, 0x1f, 0x31)
|
||||||
|
DEFAULT_TEXT = (0xe0, 0xe0, 0xe0)
|
||||||
|
DEFAULT_LINK = (0x5e, 0x8b, 0xde)
|
||||||
|
DEFAULT_DPI = 200
|
||||||
|
DEFAULT_BATCH = 50
|
||||||
|
|
||||||
|
|
||||||
|
def hex_to_rgb(h: str) -> tuple:
|
||||||
|
h = h.lstrip('#')
|
||||||
|
return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
|
||||||
|
|
||||||
|
|
||||||
|
def apply_dark_theme(
|
||||||
|
img: Image.Image,
|
||||||
|
bg=DEFAULT_BG,
|
||||||
|
text=DEFAULT_TEXT,
|
||||||
|
link=DEFAULT_LINK,
|
||||||
|
) -> Image.Image:
|
||||||
|
"""
|
||||||
|
Remap a white-background page image to a dark theme.
|
||||||
|
- Near-white pixels → bg color
|
||||||
|
- Dark pixels (ink/text) → text color
|
||||||
|
- Blue-ish pixels → link color
|
||||||
|
"""
|
||||||
|
arr = np.array(img.convert('RGB'), dtype=np.float32)
|
||||||
|
orig = arr.copy()
|
||||||
|
norm = arr / 255.0
|
||||||
|
|
||||||
|
lightness = (
|
||||||
|
0.299 * norm[:, :, 0]
|
||||||
|
+ 0.587 * norm[:, :, 1]
|
||||||
|
+ 0.114 * norm[:, :, 2]
|
||||||
|
)
|
||||||
|
|
||||||
|
r, g, b = orig[:, :, 0], orig[:, :, 1], orig[:, :, 2]
|
||||||
|
link_mask = (
|
||||||
|
(b > 100)
|
||||||
|
& (b > r * 1.3)
|
||||||
|
& (b > g * 0.9)
|
||||||
|
& (lightness < 0.85)
|
||||||
|
)
|
||||||
|
content_mask = (lightness < 0.85) & ~link_mask
|
||||||
|
blend = ((1.0 - lightness) / 0.85).clip(0, 1)
|
||||||
|
|
||||||
|
bg_f = [c / 255.0 for c in bg]
|
||||||
|
text_f = [c / 255.0 for c in text]
|
||||||
|
link_f = [c / 255.0 for c in link]
|
||||||
|
|
||||||
|
out = np.zeros_like(norm)
|
||||||
|
for i, (b_c, t, lc) in enumerate(zip(bg_f, text_f, link_f)):
|
||||||
|
channel = np.full(lightness.shape, b_c)
|
||||||
|
channel = np.where(content_mask, b_c + blend * (t - b_c), channel)
|
||||||
|
channel = np.where(link_mask, b_c + blend * (lc - b_c), channel)
|
||||||
|
out[:, :, i] = channel
|
||||||
|
|
||||||
|
return Image.fromarray((out * 255).clip(0, 255).astype('uint8'))
|
||||||
|
|
||||||
|
|
||||||
|
def _check_qpdf() -> bool:
|
||||||
|
return subprocess.run(
|
||||||
|
['qpdf', '--version'], capture_output=True
|
||||||
|
).returncode == 0
|
||||||
|
|
||||||
|
|
||||||
|
def convert_pdf_to_dark(
|
||||||
|
input_path: str | Path,
|
||||||
|
output_path: str | Path,
|
||||||
|
dpi: int = DEFAULT_DPI,
|
||||||
|
bg=DEFAULT_BG,
|
||||||
|
text=DEFAULT_TEXT,
|
||||||
|
link=DEFAULT_LINK,
|
||||||
|
batch_size: int = DEFAULT_BATCH,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Full pipeline: rasterize → apply dark theme → reassemble as PDF.
|
||||||
|
|
||||||
|
For large documents, pages are processed in batches of `batch_size` to
|
||||||
|
avoid OOM, then merged with qpdf. Falls back to single-pass Pillow save
|
||||||
|
if qpdf is not available (fine for small documents).
|
||||||
|
"""
|
||||||
|
input_path = str(input_path)
|
||||||
|
output_path = str(output_path)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
# 1. Rasterize all pages
|
||||||
|
prefix = os.path.join(tmp, 'page')
|
||||||
|
result = subprocess.run(
|
||||||
|
['pdftoppm', '-r', str(dpi), '-png', input_path, prefix],
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"pdftoppm failed:\n{result.stderr.decode()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
pages = sorted(glob.glob(prefix + '-*.png'))
|
||||||
|
if not pages:
|
||||||
|
raise RuntimeError(
|
||||||
|
"pdftoppm produced no output pages — "
|
||||||
|
"is the PDF valid and not password-protected?"
|
||||||
|
)
|
||||||
|
|
||||||
|
total = len(pages)
|
||||||
|
print(f" Converting {total} page(s) at {dpi} DPI…", flush=True)
|
||||||
|
|
||||||
|
out_dir = os.path.dirname(output_path)
|
||||||
|
if out_dir:
|
||||||
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 2. Process in batches
|
||||||
|
use_batches = total > batch_size and _check_qpdf()
|
||||||
|
|
||||||
|
if use_batches:
|
||||||
|
batch_dir = os.path.join(tmp, 'batches')
|
||||||
|
os.makedirs(batch_dir)
|
||||||
|
batch_files = []
|
||||||
|
|
||||||
|
for start in range(0, total, batch_size):
|
||||||
|
batch = pages[start:start + batch_size]
|
||||||
|
batch_num = start // batch_size + 1
|
||||||
|
batch_path = os.path.join(batch_dir, f'batch_{batch_num:04d}.pdf')
|
||||||
|
|
||||||
|
print(
|
||||||
|
f" Batch {batch_num}/{(total + batch_size - 1) // batch_size}: "
|
||||||
|
f"pages {start + 1}–{start + len(batch)}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
dark = [apply_dark_theme(Image.open(p), bg, text, link) for p in batch]
|
||||||
|
dark[0].save(batch_path, save_all=True, append_images=dark[1:])
|
||||||
|
batch_files.append(batch_path)
|
||||||
|
del dark
|
||||||
|
|
||||||
|
# 3. Merge batches with qpdf
|
||||||
|
print(" Merging batches…", flush=True)
|
||||||
|
subprocess.run(
|
||||||
|
['qpdf', '--empty', '--pages'] + batch_files + ['--', output_path],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Single-pass for small documents or when qpdf is unavailable
|
||||||
|
dark_pages = []
|
||||||
|
for i, p in enumerate(pages, 1):
|
||||||
|
if i % 50 == 0 or i == 1:
|
||||||
|
print(f" Page {i}/{total}", flush=True)
|
||||||
|
dark_pages.append(apply_dark_theme(Image.open(p), bg, text, link))
|
||||||
|
|
||||||
|
dark_pages[0].save(
|
||||||
|
output_path,
|
||||||
|
save_all=True,
|
||||||
|
append_images=dark_pages[1:],
|
||||||
|
)
|
||||||
|
|
||||||
|
size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||||
|
print(f" Saved → {output_path} ({size_mb:.1f} MB)")
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# CLI
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description='Convert a PDF to dark mode.')
|
||||||
|
parser.add_argument('input', help='Input PDF path')
|
||||||
|
parser.add_argument('output', nargs='?', help='Output PDF path (optional)')
|
||||||
|
parser.add_argument('--dpi', type=int, default=DEFAULT_DPI, help='Rasterization DPI (default: 200)')
|
||||||
|
parser.add_argument('--batch-size', type=int, default=DEFAULT_BATCH, help='Pages per batch (default: 50)')
|
||||||
|
parser.add_argument('--bg', default='1f1f31', help='Background hex color (default: 1f1f31)')
|
||||||
|
parser.add_argument('--text', default='e0e0e0', help='Body text hex color (default: e0e0e0)')
|
||||||
|
parser.add_argument('--link', default='5e8bde', help='Link/blue hex color (default: 5e8bde)')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.output:
|
||||||
|
base = Path(args.input).stem
|
||||||
|
args.output = str(Path(args.input).parent / f"{base}-dark.pdf")
|
||||||
|
|
||||||
|
convert_pdf_to_dark(
|
||||||
|
args.input,
|
||||||
|
args.output,
|
||||||
|
dpi=args.dpi,
|
||||||
|
bg=hex_to_rgb(args.bg),
|
||||||
|
text=hex_to_rgb(args.text),
|
||||||
|
link=hex_to_rgb(args.link),
|
||||||
|
batch_size=args.batch_size,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
raise SystemExit(main())
|
||||||
Reference in New Issue
Block a user