Match product.png layout: image, name, stock, barcode, SKU/UPC

- Switched from pandoc markdown to direct LaTeX for precise layout control
- Each product gets its own page matching the mockup:
  • Large bordered product image (centered)
  • Product name (bold, left)
  • Stock + price line
  • Bordered UPC-A barcode (centered)
  • SKU and UPC text (small, left)
- Fixed WebP→PNG image conversion (DG CDN serves WebP as .jpg)
- Compile directly with pdflatex (pandoc strips images from raw .tex)
- Output: 5.6MB PDF, 7 pages, 6 products with real images and barcodes
This commit is contained in:
2026-03-21 22:59:29 -07:00
parent e9efcf1460
commit c0ec0f947b

175
disco.py
View File

@@ -165,14 +165,18 @@ def enrich_from_product_page(product: dict) -> dict:
# ---------------------------------------------------------------------------
def download_image(url: str, dest: Path) -> Path | None:
"""Download image from URL, return local path or None."""
"""Download image from URL, convert to PNG for LaTeX compatibility."""
if not url:
return None
try:
resp = requests.get(url, headers=HEADERS, timeout=15)
resp.raise_for_status()
dest.write_bytes(resp.content)
return dest
# Convert to PNG regardless of source format (handles WebP, etc.)
from io import BytesIO
img = Image.open(BytesIO(resp.content)).convert("RGB")
png_dest = dest.with_suffix(".png")
img.save(png_dest, "PNG")
return png_dest
except Exception as e:
print(f" ⚠ Image download failed: {e}")
return None
@@ -225,22 +229,48 @@ def generate_barcode(sku: str, dest_dir: Path) -> Path | None:
# ---------------------------------------------------------------------------
def generate_catalog_pdf(products: list[dict]) -> Path | None:
"""Build a Markdown file and convert to PDF with pandoc."""
"""Build a LaTeX file and convert to PDF with pandoc.
Layout per page (matching product.png mockup):
┌─────────────────────┐
│ │
│ Product Image │ ← large, centered, bordered
│ │
└─────────────────────┘
Name ← product title, bold
Stk ← stock / price info
┌─────────────────────┐
│ UPC-A Barcode │ ← centered, bordered
└─────────────────────┘
SKU: XXXXXXX ← small text
UPC: XXXXXXXXXXXX ← small text
"""
timestamp_label = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
timestamp_file = datetime.now().strftime("%Y%m%d_%H%M%S")
md_lines = [
"---",
'title: "Pokemon TCG Product Catalog — Dollar General"',
f'date: "{timestamp_label}"',
"geometry: margin=0.75in",
"fontsize: 11pt",
"---",
# Build LaTeX document directly for precise layout control
latex_lines = [
r"\documentclass[11pt,letterpaper]{article}",
r"\usepackage[margin=0.75in]{geometry}",
r"\usepackage{graphicx}",
r"\usepackage{fancybox}",
r"\usepackage{xcolor}",
r"\usepackage{parskip}",
r"\usepackage[utf8]{inputenc}",
r"\usepackage[T1]{fontenc}",
r"\usepackage{lmodern}",
r"\usepackage{hyperref}",
r"\pagestyle{empty}",
r"\begin{document}",
"",
f"**Generated**: {timestamp_label} ",
f"**Products**: {len(products)} Cards & Tins ",
"",
"\\newpage",
# Title page
r"\begin{center}",
r"{\Huge\bfseries Pokemon TCG Product Catalog}\\[0.5cm]",
r"{\Large Dollar General}\\[0.3cm]",
r"{\large Generated: " + timestamp_label + r"}\\[0.2cm]",
r"{\large " + str(len(products)) + r" Cards \& Tins}",
r"\end{center}",
r"\newpage",
"",
]
@@ -255,66 +285,109 @@ def generate_catalog_pdf(products: list[dict]) -> Path | None:
img_dest = IMAGES_DIR / f"product_{i}_{sku}.jpg"
img_path = download_image(prod.get("image_url"), img_dest)
if not img_path:
img_path = make_placeholder(IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30])
img_path = make_placeholder(
IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30]
)
# Generate barcode
bc_path = generate_barcode(sku, BARCODES_DIR)
# Relative paths for pandoc (run from OUTPUT_DIR)
rel_img = os.path.relpath(img_path, OUTPUT_DIR)
rel_bc = os.path.relpath(bc_path, OUTPUT_DIR) if bc_path else None
# Escape LaTeX special characters in text fields
safe_title = (
title.replace("&", r"\&")
.replace("%", r"\%")
.replace("$", r"\$")
.replace("#", r"\#")
.replace("_", r"\_")
.replace("é", r"\'e")
)
safe_stock = stock.replace("&", r"\&")
safe_price = price.replace("$", r"\$")
md_lines += [
f"## {i}. {title}",
# Absolute paths for LaTeX
abs_img = str(img_path.resolve())
abs_bc = str(bc_path.resolve()) if bc_path else None
latex_lines += [
r"\begin{center}",
# Product image — large, centered, with border
r"\fbox{\includegraphics[width=0.7\textwidth,height=0.45\textheight,keepaspectratio]{"
+ abs_img
+ r"}}",
r"\end{center}",
r"\vspace{0.4cm}",
"",
f"![{title}]({rel_img}){{ width=200px }}",
# Name — bold, large
r"{\Large\bfseries " + safe_title + r"}",
"",
"| Field | Value |",
"|-------|-------|",
f"| **Price** | {price} |",
f"| **Stock** | {stock} |",
f"| **SKU** | `{sku}` |",
f"| **UPC** | `{upc}` |",
r"\vspace{0.15cm}",
"",
# Stock and price
r"{\large " + safe_stock + r" \hfill " + safe_price + r"}",
"",
r"\vspace{0.5cm}",
"",
]
if rel_bc:
md_lines += [
f"![UPC-A Barcode]({rel_bc}){{ width=250px }}",
# Barcode — centered, bordered
if abs_bc:
latex_lines += [
r"\begin{center}",
r"\fbox{\includegraphics[width=0.55\textwidth]{"
+ abs_bc
+ r"}}",
r"\end{center}",
r"\vspace{0.15cm}",
"",
]
md_lines += ["\\newpage", ""]
# SKU and UPC — small text
latex_lines += [
r"{\small SKU: \texttt{" + sku + r"}}\\",
r"{\small UPC: \texttt{" + upc + r"}}",
"",
]
# Page break between products (not after last)
if i < len(products):
latex_lines.append(r"\newpage")
latex_lines.append("")
print(f" ✅ [{i}/{len(products)}] {title}")
# Write markdown
md_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.md"
md_file.write_text("\n".join(md_lines), encoding="utf-8")
print(f"\n📝 Markdown: {md_file}")
latex_lines.append(r"\end{document}")
# Convert to PDF
# Write .tex file
tex_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.tex"
tex_file.write_text("\n".join(latex_lines), encoding="utf-8")
print(f"\n📝 LaTeX source: {tex_file}")
# Compile to PDF with pdflatex directly (pandoc strips images from raw .tex)
pdf_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.pdf"
engines = ["pdflatex", "xelatex"]
for engine in engines:
for engine in ["pdflatex", "xelatex"]:
try:
cmd = [
"pandoc", str(md_file),
"-o", str(pdf_file),
f"--pdf-engine={engine}",
"-V", "colorlinks=true",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
print(f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)")
result = subprocess.run(
[engine, "-interaction=nonstopmode",
f"-output-directory={OUTPUT_DIR}", str(tex_file)],
capture_output=True, text=True, timeout=120,
)
if pdf_file.exists() and pdf_file.stat().st_size > 1000:
# Clean up LaTeX temp files
for ext in [".aux", ".log", ".out"]:
tmp = pdf_file.with_suffix(ext)
if tmp.exists():
tmp.unlink()
print(
f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)"
)
return pdf_file
else:
continue
except FileNotFoundError:
continue
except Exception:
continue
print(f"⚠ PDF generation failed. Markdown available at: {md_file}")
print(f"⚠ PDF generation failed. LaTeX source available at: {tex_file}")
return None
# ---------------------------------------------------------------------------