Match product.png layout: image, name, stock, barcode, SKU/UPC

- Switched from pandoc markdown to direct LaTeX for precise layout control
- Each product gets its own page matching the mockup:
  • Large bordered product image (centered)
  • Product name (bold, left)
  • Stock + price line
  • Bordered UPC-A barcode (centered)
  • SKU and UPC text (small, left)
- Fixed WebP→PNG image conversion (DG CDN serves WebP as .jpg)
- Compile directly with pdflatex (pandoc strips images from raw .tex)
- Output: 5.6MB PDF, 7 pages, 6 products with real images and barcodes
This commit is contained in:
2026-03-21 22:59:29 -07:00
parent e9efcf1460
commit c0ec0f947b

173
disco.py
View File

@@ -165,14 +165,18 @@ def enrich_from_product_page(product: dict) -> dict:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def download_image(url: str, dest: Path) -> Path | None: def download_image(url: str, dest: Path) -> Path | None:
"""Download image from URL, return local path or None.""" """Download image from URL, convert to PNG for LaTeX compatibility."""
if not url: if not url:
return None return None
try: try:
resp = requests.get(url, headers=HEADERS, timeout=15) resp = requests.get(url, headers=HEADERS, timeout=15)
resp.raise_for_status() resp.raise_for_status()
dest.write_bytes(resp.content) # Convert to PNG regardless of source format (handles WebP, etc.)
return dest from io import BytesIO
img = Image.open(BytesIO(resp.content)).convert("RGB")
png_dest = dest.with_suffix(".png")
img.save(png_dest, "PNG")
return png_dest
except Exception as e: except Exception as e:
print(f" ⚠ Image download failed: {e}") print(f" ⚠ Image download failed: {e}")
return None return None
@@ -225,22 +229,48 @@ def generate_barcode(sku: str, dest_dir: Path) -> Path | None:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def generate_catalog_pdf(products: list[dict]) -> Path | None: def generate_catalog_pdf(products: list[dict]) -> Path | None:
"""Build a Markdown file and convert to PDF with pandoc.""" """Build a LaTeX file and convert to PDF with pandoc.
Layout per page (matching product.png mockup):
┌─────────────────────┐
│ │
│ Product Image │ ← large, centered, bordered
│ │
└─────────────────────┘
Name ← product title, bold
Stk ← stock / price info
┌─────────────────────┐
│ UPC-A Barcode │ ← centered, bordered
└─────────────────────┘
SKU: XXXXXXX ← small text
UPC: XXXXXXXXXXXX ← small text
"""
timestamp_label = datetime.now().strftime("%Y-%m-%d %H:%M:%S") timestamp_label = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
timestamp_file = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp_file = datetime.now().strftime("%Y%m%d_%H%M%S")
md_lines = [ # Build LaTeX document directly for precise layout control
"---", latex_lines = [
'title: "Pokemon TCG Product Catalog — Dollar General"', r"\documentclass[11pt,letterpaper]{article}",
f'date: "{timestamp_label}"', r"\usepackage[margin=0.75in]{geometry}",
"geometry: margin=0.75in", r"\usepackage{graphicx}",
"fontsize: 11pt", r"\usepackage{fancybox}",
"---", r"\usepackage{xcolor}",
r"\usepackage{parskip}",
r"\usepackage[utf8]{inputenc}",
r"\usepackage[T1]{fontenc}",
r"\usepackage{lmodern}",
r"\usepackage{hyperref}",
r"\pagestyle{empty}",
r"\begin{document}",
"", "",
f"**Generated**: {timestamp_label} ", # Title page
f"**Products**: {len(products)} Cards & Tins ", r"\begin{center}",
"", r"{\Huge\bfseries Pokemon TCG Product Catalog}\\[0.5cm]",
"\\newpage", r"{\Large Dollar General}\\[0.3cm]",
r"{\large Generated: " + timestamp_label + r"}\\[0.2cm]",
r"{\large " + str(len(products)) + r" Cards \& Tins}",
r"\end{center}",
r"\newpage",
"", "",
] ]
@@ -255,66 +285,109 @@ def generate_catalog_pdf(products: list[dict]) -> Path | None:
img_dest = IMAGES_DIR / f"product_{i}_{sku}.jpg" img_dest = IMAGES_DIR / f"product_{i}_{sku}.jpg"
img_path = download_image(prod.get("image_url"), img_dest) img_path = download_image(prod.get("image_url"), img_dest)
if not img_path: if not img_path:
img_path = make_placeholder(IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30]) img_path = make_placeholder(
IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30]
)
# Generate barcode # Generate barcode
bc_path = generate_barcode(sku, BARCODES_DIR) bc_path = generate_barcode(sku, BARCODES_DIR)
# Relative paths for pandoc (run from OUTPUT_DIR) # Escape LaTeX special characters in text fields
rel_img = os.path.relpath(img_path, OUTPUT_DIR) safe_title = (
rel_bc = os.path.relpath(bc_path, OUTPUT_DIR) if bc_path else None title.replace("&", r"\&")
.replace("%", r"\%")
.replace("$", r"\$")
.replace("#", r"\#")
.replace("_", r"\_")
.replace("é", r"\'e")
)
safe_stock = stock.replace("&", r"\&")
safe_price = price.replace("$", r"\$")
md_lines += [ # Absolute paths for LaTeX
f"## {i}. {title}", abs_img = str(img_path.resolve())
abs_bc = str(bc_path.resolve()) if bc_path else None
latex_lines += [
r"\begin{center}",
# Product image — large, centered, with border
r"\fbox{\includegraphics[width=0.7\textwidth,height=0.45\textheight,keepaspectratio]{"
+ abs_img
+ r"}}",
r"\end{center}",
r"\vspace{0.4cm}",
"", "",
f"![{title}]({rel_img}){{ width=200px }}", # Name — bold, large
r"{\Large\bfseries " + safe_title + r"}",
"", "",
"| Field | Value |", r"\vspace{0.15cm}",
"|-------|-------|", "",
f"| **Price** | {price} |", # Stock and price
f"| **Stock** | {stock} |", r"{\large " + safe_stock + r" \hfill " + safe_price + r"}",
f"| **SKU** | `{sku}` |", "",
f"| **UPC** | `{upc}` |", r"\vspace{0.5cm}",
"", "",
] ]
if rel_bc: # Barcode — centered, bordered
md_lines += [ if abs_bc:
f"![UPC-A Barcode]({rel_bc}){{ width=250px }}", latex_lines += [
r"\begin{center}",
r"\fbox{\includegraphics[width=0.55\textwidth]{"
+ abs_bc
+ r"}}",
r"\end{center}",
r"\vspace{0.15cm}",
"", "",
] ]
md_lines += ["\\newpage", ""] # SKU and UPC — small text
latex_lines += [
r"{\small SKU: \texttt{" + sku + r"}}\\",
r"{\small UPC: \texttt{" + upc + r"}}",
"",
]
# Page break between products (not after last)
if i < len(products):
latex_lines.append(r"\newpage")
latex_lines.append("")
print(f" ✅ [{i}/{len(products)}] {title}") print(f" ✅ [{i}/{len(products)}] {title}")
# Write markdown latex_lines.append(r"\end{document}")
md_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.md"
md_file.write_text("\n".join(md_lines), encoding="utf-8")
print(f"\n📝 Markdown: {md_file}")
# Convert to PDF # Write .tex file
tex_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.tex"
tex_file.write_text("\n".join(latex_lines), encoding="utf-8")
print(f"\n📝 LaTeX source: {tex_file}")
# Compile to PDF with pdflatex directly (pandoc strips images from raw .tex)
pdf_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.pdf" pdf_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.pdf"
engines = ["pdflatex", "xelatex"]
for engine in engines: for engine in ["pdflatex", "xelatex"]:
try: try:
cmd = [ result = subprocess.run(
"pandoc", str(md_file), [engine, "-interaction=nonstopmode",
"-o", str(pdf_file), f"-output-directory={OUTPUT_DIR}", str(tex_file)],
f"--pdf-engine={engine}", capture_output=True, text=True, timeout=120,
"-V", "colorlinks=true", )
] if pdf_file.exists() and pdf_file.stat().st_size > 1000:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) # Clean up LaTeX temp files
if result.returncode == 0: for ext in [".aux", ".log", ".out"]:
print(f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)") tmp = pdf_file.with_suffix(ext)
if tmp.exists():
tmp.unlink()
print(
f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)"
)
return pdf_file return pdf_file
else: except FileNotFoundError:
continue continue
except Exception: except Exception:
continue continue
print(f"⚠ PDF generation failed. Markdown available at: {md_file}") print(f"⚠ PDF generation failed. LaTeX source available at: {tex_file}")
return None return None
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------