From c0ec0f947ba3b5989358043ff4a9d6daca892212 Mon Sep 17 00:00:00 2001 From: pi-bot-01 Date: Sat, 21 Mar 2026 22:59:29 -0700 Subject: [PATCH] Match product.png layout: image, name, stock, barcode, SKU/UPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switched from pandoc markdown to direct LaTeX for precise layout control - Each product gets its own page matching the mockup: • Large bordered product image (centered) • Product name (bold, left) • Stock + price line • Bordered UPC-A barcode (centered) • SKU and UPC text (small, left) - Fixed WebP→PNG image conversion (DG CDN serves WebP as .jpg) - Compile directly with pdflatex (pandoc strips images from raw .tex) - Output: 5.6MB PDF, 7 pages, 6 products with real images and barcodes --- disco.py | 175 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 124 insertions(+), 51 deletions(-) diff --git a/disco.py b/disco.py index dc2e78d..71f1fcc 100644 --- a/disco.py +++ b/disco.py @@ -165,14 +165,18 @@ def enrich_from_product_page(product: dict) -> dict: # --------------------------------------------------------------------------- def download_image(url: str, dest: Path) -> Path | None: - """Download image from URL, return local path or None.""" + """Download image from URL, convert to PNG for LaTeX compatibility.""" if not url: return None try: resp = requests.get(url, headers=HEADERS, timeout=15) resp.raise_for_status() - dest.write_bytes(resp.content) - return dest + # Convert to PNG regardless of source format (handles WebP, etc.) + from io import BytesIO + img = Image.open(BytesIO(resp.content)).convert("RGB") + png_dest = dest.with_suffix(".png") + img.save(png_dest, "PNG") + return png_dest except Exception as e: print(f" ⚠ Image download failed: {e}") return None @@ -225,22 +229,48 @@ def generate_barcode(sku: str, dest_dir: Path) -> Path | None: # --------------------------------------------------------------------------- def generate_catalog_pdf(products: list[dict]) -> Path | None: - """Build a Markdown file and convert to PDF with pandoc.""" + """Build a LaTeX file and convert to PDF with pandoc. + + Layout per page (matching product.png mockup): + ┌─────────────────────┐ + │ │ + │ Product Image │ ← large, centered, bordered + │ │ + └─────────────────────┘ + Name ← product title, bold + Stk ← stock / price info + ┌─────────────────────┐ + │ UPC-A Barcode │ ← centered, bordered + └─────────────────────┘ + SKU: XXXXXXX ← small text + UPC: XXXXXXXXXXXX ← small text + """ timestamp_label = datetime.now().strftime("%Y-%m-%d %H:%M:%S") timestamp_file = datetime.now().strftime("%Y%m%d_%H%M%S") - md_lines = [ - "---", - 'title: "Pokemon TCG Product Catalog — Dollar General"', - f'date: "{timestamp_label}"', - "geometry: margin=0.75in", - "fontsize: 11pt", - "---", + # Build LaTeX document directly for precise layout control + latex_lines = [ + r"\documentclass[11pt,letterpaper]{article}", + r"\usepackage[margin=0.75in]{geometry}", + r"\usepackage{graphicx}", + r"\usepackage{fancybox}", + r"\usepackage{xcolor}", + r"\usepackage{parskip}", + r"\usepackage[utf8]{inputenc}", + r"\usepackage[T1]{fontenc}", + r"\usepackage{lmodern}", + r"\usepackage{hyperref}", + r"\pagestyle{empty}", + r"\begin{document}", "", - f"**Generated**: {timestamp_label} ", - f"**Products**: {len(products)} Cards & Tins ", - "", - "\\newpage", + # Title page + r"\begin{center}", + r"{\Huge\bfseries Pokemon TCG Product Catalog}\\[0.5cm]", + r"{\Large Dollar General}\\[0.3cm]", + r"{\large Generated: " + timestamp_label + r"}\\[0.2cm]", + r"{\large " + str(len(products)) + r" Cards \& Tins}", + r"\end{center}", + r"\newpage", "", ] @@ -255,66 +285,109 @@ def generate_catalog_pdf(products: list[dict]) -> Path | None: img_dest = IMAGES_DIR / f"product_{i}_{sku}.jpg" img_path = download_image(prod.get("image_url"), img_dest) if not img_path: - img_path = make_placeholder(IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30]) + img_path = make_placeholder( + IMAGES_DIR / f"product_{i}_{sku}_placeholder.png", title[:30] + ) # Generate barcode bc_path = generate_barcode(sku, BARCODES_DIR) - # Relative paths for pandoc (run from OUTPUT_DIR) - rel_img = os.path.relpath(img_path, OUTPUT_DIR) - rel_bc = os.path.relpath(bc_path, OUTPUT_DIR) if bc_path else None + # Escape LaTeX special characters in text fields + safe_title = ( + title.replace("&", r"\&") + .replace("%", r"\%") + .replace("$", r"\$") + .replace("#", r"\#") + .replace("_", r"\_") + .replace("é", r"\'e") + ) + safe_stock = stock.replace("&", r"\&") + safe_price = price.replace("$", r"\$") - md_lines += [ - f"## {i}. {title}", + # Absolute paths for LaTeX + abs_img = str(img_path.resolve()) + abs_bc = str(bc_path.resolve()) if bc_path else None + + latex_lines += [ + r"\begin{center}", + # Product image — large, centered, with border + r"\fbox{\includegraphics[width=0.7\textwidth,height=0.45\textheight,keepaspectratio]{" + + abs_img + + r"}}", + r"\end{center}", + r"\vspace{0.4cm}", "", - f"![{title}]({rel_img}){{ width=200px }}", + # Name — bold, large + r"{\Large\bfseries " + safe_title + r"}", "", - "| Field | Value |", - "|-------|-------|", - f"| **Price** | {price} |", - f"| **Stock** | {stock} |", - f"| **SKU** | `{sku}` |", - f"| **UPC** | `{upc}` |", + r"\vspace{0.15cm}", + "", + # Stock and price + r"{\large " + safe_stock + r" \hfill " + safe_price + r"}", + "", + r"\vspace{0.5cm}", "", ] - if rel_bc: - md_lines += [ - f"![UPC-A Barcode]({rel_bc}){{ width=250px }}", + # Barcode — centered, bordered + if abs_bc: + latex_lines += [ + r"\begin{center}", + r"\fbox{\includegraphics[width=0.55\textwidth]{" + + abs_bc + + r"}}", + r"\end{center}", + r"\vspace{0.15cm}", "", ] - md_lines += ["\\newpage", ""] + # SKU and UPC — small text + latex_lines += [ + r"{\small SKU: \texttt{" + sku + r"}}\\", + r"{\small UPC: \texttt{" + upc + r"}}", + "", + ] + + # Page break between products (not after last) + if i < len(products): + latex_lines.append(r"\newpage") + latex_lines.append("") print(f" ✅ [{i}/{len(products)}] {title}") - # Write markdown - md_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.md" - md_file.write_text("\n".join(md_lines), encoding="utf-8") - print(f"\n📝 Markdown: {md_file}") + latex_lines.append(r"\end{document}") - # Convert to PDF + # Write .tex file + tex_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.tex" + tex_file.write_text("\n".join(latex_lines), encoding="utf-8") + print(f"\n📝 LaTeX source: {tex_file}") + + # Compile to PDF with pdflatex directly (pandoc strips images from raw .tex) pdf_file = OUTPUT_DIR / f"pokemon_catalog_{timestamp_file}.pdf" - engines = ["pdflatex", "xelatex"] - for engine in engines: + for engine in ["pdflatex", "xelatex"]: try: - cmd = [ - "pandoc", str(md_file), - "-o", str(pdf_file), - f"--pdf-engine={engine}", - "-V", "colorlinks=true", - ] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - if result.returncode == 0: - print(f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)") + result = subprocess.run( + [engine, "-interaction=nonstopmode", + f"-output-directory={OUTPUT_DIR}", str(tex_file)], + capture_output=True, text=True, timeout=120, + ) + if pdf_file.exists() and pdf_file.stat().st_size > 1000: + # Clean up LaTeX temp files + for ext in [".aux", ".log", ".out"]: + tmp = pdf_file.with_suffix(ext) + if tmp.exists(): + tmp.unlink() + print( + f"📄 PDF generated: {pdf_file} ({pdf_file.stat().st_size // 1024} KB)" + ) return pdf_file - else: - continue + except FileNotFoundError: + continue except Exception: continue - print(f"⚠ PDF generation failed. Markdown available at: {md_file}") + print(f"⚠ PDF generation failed. LaTeX source available at: {tex_file}") return None # ---------------------------------------------------------------------------