From c0ec0f947ba3b5989358043ff4a9d6daca892212 Mon Sep 17 00:00:00 2001
From: pi-bot-01 <pi-bot-01@dominat.us>
Date: Sat, 21 Mar 2026 22:59:29 -0700
Subject: [PATCH] Match product.png layout: image, name, stock, barcode,
 SKU/UPC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Switched from pandoc markdown to direct LaTeX for precise layout control
- Each product gets its own page matching the mockup:
  • Large bordered product image (centered)
  • Product name (bold, left)
  • Stock + price line
  • Bordered UPC-A barcode (centered)
  • SKU and UPC text (small, left)
- Fixed WebP→PNG image conversion (DG CDN serves WebP as .jpg)
- Compile directly with pdflatex (pandoc strips images from raw .tex)
- Output: 5.6MB PDF, 7 pages, 6 products with real images and barcodes
---
 disco.py | 175 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 124 insertions(+), 51 deletions(-)

diff --git a/disco.py b/disco.py
index dc2e78d..71f1fcc 100644
--- a/disco.py
+++ b/disco.py
@@ -165,14 +165,18 @@ def enrich_from_product_page(product: dict) -> dict:
 # ---------------------------------------------------------------------------
 
 def download_image(url: str, dest: Path) -> Path | None:
-    """Download image from URL, return local path or None."""
+    """Download image from URL, convert to PNG for LaTeX compatibility."""
     if not url:
         return None
     try:
         resp = requests.get(url, headers=HEADERS, timeout=15)
         resp.raise_for_status()
-        dest.write_bytes(resp.content)
-        return dest
+        # Convert to PNG regardless of source format (handles WebP, etc.)
+        from io import BytesIO
+        img = Image.open(BytesIO(resp.content)).convert("RGB")
+        png_dest = dest.with_suffix(".png")
+        img.save(png_dest, "PNG")
+        return png_dest
     except Exception as e:
         print(f"   ⚠ Image download failed: {e}")
         return None
@@ -225,22 +229,48 @@ def generate_barcode(sku: str, dest_dir: Path) -> Path | None:
 # ---------------------------------------------------------------------------
 
 def generate_catalog_pdf(products: list[dict]) -> Path | None:
-    """Build a Markdown file and convert to PDF with pandoc."""
+    """Build a LaTeX file and convert to PDF with pandoc.
+
+    Layout per page (matching product.png mockup):
+        ┌─────────────────────┐
+        │                     │
+        │    Product Image    │   ← large, centered, bordered
+        │                     │
+        └─────────────────────┘
+        Name                      ← product title, bold
+        Stk                       ← stock / price info
+        ┌─────────────────────┐
+        │    UPC-A Barcode    │   ← centered, bordered
+        └─────────────────────┘
+        SKU: XXXXXXX              ← small text
+        UPC: XXXXXXXXXXXX         ← small text
+    """
     timestamp_label = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     timestamp_file = datetime.now().strftime("%Y%m%d_%H%M%S")
 
-    md_lines = [
-        "---",
-        'title: "Pokemon TCG Product Catalog — Dollar General"',
-        f'date: "{timestamp_label}"',
-        "geometry: margin=0.75in",
-        "fontsize: 11pt",
-        "---",
+    # Build LaTeX document directly for precise layout control
+    latex_lines = [
+        r"\documentclass[11pt,letterpaper]{article}",
+        r"\usepackage[margin=0.75in]{geometry}",
+        r"\usepackage{graphicx}",
+        r"\usepackage{fancybox}",
+        r"\usepackage{xcolor}",
+        r"\usepackage{parskip}",
+        r"\usepackage[utf8]{inputenc}",
+        r"\usepackage[T1]{fontenc}",
+        r"\usepackage{lmodern}",
+        r"\usepackage{hyperref}",
+        r"\pagestyle{empty}",
+        r"\begin{document}",
         "",
-        f"**Generated**: {timestamp_label}  ",
-        f"**Products**: {len(products)} Cards & Tins  ",
-        "",
-        "\\newpage",
+        # Title page
+        r"\begin{center}",
+        r"{\Huge\bfseries Pokemon TCG Product Catalog}\\[0.5cm]",
+        r"{\Large Dollar General}\\[0.3cm]",
+        r"{\large Generated: " + timestamp_label + r"}\\[0.2cm]",
+        r"{\large " + str(len(products)) + r" Cards \& Tins}",
+        r"\end{center}",
+        r"\newpage",
         "",
     ]
 
@@ -255,66 +285,109 @@ def generate_catalog_pdf(products: list[dict]) -> Path | None:
         img_dest = IMAGES_DIR / f"product_{i}_{sku}.jpg"
         img_path = download_image(prod.get("image_url"), img_dest)
         if not img_path:
-            img_path = make_placeholder(IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30])
+            img_path = make_placeholder(
+                IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30]
+            )
 
         # Generate barcode
         bc_path = generate_barcode(sku, BARCODES_DIR)
 
-        # Relative paths for pandoc (run from OUTPUT_DIR)
-        rel_img = os.path.relpath(img_path, OUTPUT_DIR)
-        rel_bc = os.path.relpath(bc_path, OUTPUT_DIR) if bc_path else None
+        # Escape LaTeX special characters in text fields
+        safe_title = (
+            title.replace("&", r"\&")
+            .replace("%", r"\%")
+            .replace("$", r"\$")
+            .replace("#", r"\#")
+            .replace("_", r"\_")
+            .replace("é", r"\'e")
+        )
+        safe_stock = stock.replace("&", r"\&")
+        safe_price = price.replace("$", r"\$")
 
-        md_lines += [
-            f"## {i}. {title}",
+        # Absolute paths for LaTeX
+        abs_img = str(img_path.resolve())
+        abs_bc = str(bc_path.resolve()) if bc_path else None
+
+        latex_lines += [
+            r"\begin{center}",
+            # Product image — large, centered, with border
+            r"\fbox{\includegraphics[width=0.7\textwidth,height=0.45\textheight,keepaspectratio]{"
+            + abs_img
+            + r"}}",
+            r"\end{center}",
+            r"\vspace{0.4cm}",
             "",
-            f"![{title}]({rel_img}){{ width=200px }}",
+            # Name — bold, large
+            r"{\Large\bfseries " + safe_title + r"}",
             "",
-            "| Field | Value |",
-            "|-------|-------|",
-            f"| **Price** | {price} |",
-            f"| **Stock** | {stock} |",
-            f"| **SKU** | `{sku}` |",
-            f"| **UPC** | `{upc}` |",
+            r"\vspace{0.15cm}",
+            "",
+            # Stock and price
+            r"{\large " + safe_stock + r" \hfill " + safe_price + r"}",
+            "",
+            r"\vspace{0.5cm}",
             "",
         ]
 
-        if rel_bc:
-            md_lines += [
-                f"![UPC-A Barcode]({rel_bc}){{ width=250px }}",
+        # Barcode — centered, bordered
+        if abs_bc:
+            latex_lines += [
+                r"\begin{center}",
+                r"\fbox{\includegraphics[width=0.55\textwidth]{"
+                + abs_bc
+                + r"}}",
+                r"\end{center}",
+                r"\vspace{0.15cm}",
                 "",
             ]
 
-        md_lines += ["\\newpage", ""]
+        # SKU and UPC — small text
+        latex_lines += [
+            r"{\small SKU: \texttt{" + sku + r"}}\\",
+            r"{\small UPC: \texttt{" + upc + r"}}",
+            "",
+        ]
+
+        # Page break between products (not after last)
+        if i < len(products):
+            latex_lines.append(r"\newpage")
+            latex_lines.append("")
 
         print(f"   ✅ [{i}/{len(products)}] {title}")
 
-    # Write markdown
-    md_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.md"
-    md_file.write_text("\n".join(md_lines), encoding="utf-8")
-    print(f"\n📝 Markdown: {md_file}")
+    latex_lines.append(r"\end{document}")
 
-    # Convert to PDF
+    # Write .tex file
+    tex_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.tex"
+    tex_file.write_text("\n".join(latex_lines), encoding="utf-8")
+    print(f"\n📝 LaTeX source: {tex_file}")
+
+    # Compile to PDF with pdflatex directly (pandoc strips images from raw .tex)
     pdf_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.pdf"
-    engines = ["pdflatex", "xelatex"]
 
-    for engine in engines:
+    for engine in ["pdflatex", "xelatex"]:
         try:
-            cmd = [
-                "pandoc", str(md_file),
-                "-o", str(pdf_file),
-                f"--pdf-engine={engine}",
-                "-V", "colorlinks=true",
-            ]
-            result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
-            if result.returncode == 0:
-                print(f"📄 PDF generated: {pdf_file}  ({pdf_file.stat().st_size // 1024} KB)")
+            result = subprocess.run(
+                [engine, "-interaction=nonstopmode",
+                 f"-output-directory={OUTPUT_DIR}", str(tex_file)],
+                capture_output=True, text=True, timeout=120,
+            )
+            if pdf_file.exists() and pdf_file.stat().st_size > 1000:
+                # Clean up LaTeX temp files
+                for ext in [".aux", ".log", ".out"]:
+                    tmp = pdf_file.with_suffix(ext)
+                    if tmp.exists():
+                        tmp.unlink()
+                print(
+                    f"📄 PDF generated: {pdf_file}  ({pdf_file.stat().st_size // 1024} KB)"
+                )
                 return pdf_file
-            else:
-                continue
+        except FileNotFoundError:
+            continue
         except Exception:
             continue
 
-    print(f"⚠ PDF generation failed. Markdown available at: {md_file}")
+    print(f"⚠ PDF generation failed. LaTeX source available at: {tex_file}")
     return None
 
 # ---------------------------------------------------------------------------