#!/usr/bin/env python3 """ Pokemon Discovery - TCG Product Catalog PDF Generator Generates PDF catalog with product images, details, and UPC-A barcodes """ import json import os import sys import requests import subprocess from datetime import datetime from pathlib import Path import barcode from barcode.writer import ImageWriter from PIL import Image, ImageDraw, ImageFont import tempfile import shutil class PokemonTCGCatalogGenerator: def __init__(self, json_file): self.json_file = json_file self.output_dir = Path("catalog_output") self.images_dir = self.output_dir / "images" self.barcodes_dir = self.output_dir / "barcodes" # Create output directories self.output_dir.mkdir(exist_ok=True) self.images_dir.mkdir(exist_ok=True) self.barcodes_dir.mkdir(exist_ok=True) # Load product data with open(json_file, 'r') as f: self.products = json.load(f) def download_image(self, url, filename): """Download product image""" if not url: return None try: response = requests.get(url, timeout=30) response.raise_for_status() filepath = self.images_dir / filename with open(filepath, 'wb') as f: f.write(response.content) return filepath except Exception as e: print(f"Failed to download image {url}: {e}") return None def generate_upc_barcode(self, sku): """Generate UPC-A barcode from SKU""" try: # Convert SKU to 12-digit UPC-A format # Remove non-digits and pad/truncate to 11 digits (12th is check digit) digits_only = ''.join(filter(str.isdigit, str(sku))) if len(digits_only) < 11: # Pad with zeros at the start upc_base = digits_only.zfill(11) else: # Take the last 11 digits upc_base = digits_only[-11:] # Generate UPC-A barcode upc_generator = barcode.get_barcode_class('upca') upc = upc_generator(upc_base, writer=ImageWriter()) # Save barcode image barcode_filename = f"barcode_{sku.replace('/', '_').replace(' ', '_')}" barcode_path = self.barcodes_dir / barcode_filename # Save with specific options for better appearance upc.save(str(barcode_path).replace('.png', ''), options={ 'module_width': 0.2, 'module_height': 15.0, 'quiet_zone': 6.5, 'font_size': 10, 'text_distance': 5.0, 'background': 'white', 'foreground': 'black' }) final_path = f"{barcode_path}.png" return final_path except Exception as e: print(f"Failed to generate barcode for SKU {sku}: {e}") return None def create_placeholder_image(self, width=300, height=200): """Create a placeholder image when product image is not available""" img = Image.new('RGB', (width, height), color='lightgray') draw = ImageDraw.Draw(img) try: # Try to use a system font font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', 24) except: try: font = ImageFont.truetype('arial.ttf', 24) except: font = ImageFont.load_default() text = "No Image\nAvailable" # Get text bounding box for centering lines = text.split('\n') y_offset = height // 2 - (len(lines) * 30) // 2 for line in lines: bbox = draw.textbbox((0, 0), line, font=font) text_width = bbox[2] - bbox[0] x_offset = (width - text_width) // 2 draw.text((x_offset, y_offset), line, fill='darkgray', font=font) y_offset += 30 placeholder_path = self.images_dir / "placeholder.png" img.save(placeholder_path) return placeholder_path def generate_markdown(self): """Generate markdown content for the catalog""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") markdown = f"""--- title: "Pokemon TCG Product Catalog" subtitle: "Dollar General - Generated {timestamp}" author: "Automated Scraper" date: "{timestamp}" geometry: margin=1in fontsize: 11pt documentclass: article --- # Pokemon TCG Product Catalog Generated on: {timestamp} Source: Dollar General Total Products: {len(self.products)} --- """ for i, product in enumerate(self.products, 1): print(f"Processing product {i}/{len(self.products)}: {product.get('title', 'Unknown')}") # Download product image image_path = None if product.get('image_url'): filename = f"product_{i}_{product.get('sku', 'unknown').replace('/', '_').replace(' ', '_')}.jpg" image_path = self.download_image(product.get('image_url'), filename) if not image_path: # Use placeholder image_path = self.create_placeholder_image() # Generate barcode barcode_path = None if product.get('sku'): barcode_path = self.generate_upc_barcode(product.get('sku')) # Add product section to markdown markdown += f"## {i}. {product.get('title', 'Unknown Product')}\n\n" # Product image if image_path: rel_image_path = os.path.relpath(image_path, self.output_dir) markdown += f"![Product Image]({rel_image_path}){{width=300px}}\n\n" # Product details in a table markdown += "| Field | Value |\n" markdown += "|-------|-------|\n" markdown += f"| **Title** | {product.get('title', 'N/A')} |\n" markdown += f"| **Price** | {product.get('price', 'N/A')} |\n" markdown += f"| **Stock** | {product.get('stock', 'N/A')} |\n" markdown += f"| **SKU** | `{product.get('sku', 'N/A')}` |\n" markdown += f"| **URL** | {product.get('url', 'N/A')} |\n" markdown += "\n" # Barcode if barcode_path: rel_barcode_path = os.path.relpath(barcode_path, self.output_dir) markdown += f"**UPC-A Barcode:**\n\n" markdown += f"![UPC-A Barcode]({rel_barcode_path}){{width=200px}}\n\n" markdown += "---\n\n" return markdown def generate_pdf(self): """Generate PDF catalog using pandoc""" print("Generating markdown content...") markdown_content = self.generate_markdown() # Save markdown file timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") markdown_file = self.output_dir / f"pokemon_tcg_catalog_{timestamp}.md" with open(markdown_file, 'w', encoding='utf-8') as f: f.write(markdown_content) print(f"Markdown saved to: {markdown_file}") # Generate PDF using pandoc pdf_file = self.output_dir / f"pokemon_tcg_catalog_{timestamp}.pdf" print("Converting to PDF using pandoc...") try: subprocess.run([ 'pandoc', str(markdown_file), '-o', str(pdf_file), '--pdf-engine=xelatex', '-V', 'colorlinks=true', '-V', 'linkcolor=blue', '-V', 'filecolor=magenta', '-V', 'urlcolor=cyan', '--toc', '--toc-depth=2' ], check=True) print(f"PDF generated successfully: {pdf_file}") return pdf_file except subprocess.CalledProcessError as e: print(f"Pandoc conversion failed: {e}") print("Trying with pdflatex instead...") try: subprocess.run([ 'pandoc', str(markdown_file), '-o', str(pdf_file), '--pdf-engine=pdflatex', '--toc' ], check=True) print(f"PDF generated successfully: {pdf_file}") return pdf_file except subprocess.CalledProcessError as e2: print(f"PDF generation failed with both engines: {e2}") print(f"Markdown file available at: {markdown_file}") return None except FileNotFoundError: print("Error: pandoc not found. Please install pandoc to generate PDF.") print(f"Markdown file available at: {markdown_file}") return None def main(): if len(sys.argv) != 2: print("Usage: python3 pdf_generator.py ") print("Example: python3 pdf_generator.py pokemon_tcg_products_20241221_143025.json") sys.exit(1) json_file = sys.argv[1] if not os.path.exists(json_file): print(f"Error: JSON file '{json_file}' not found") sys.exit(1) generator = PokemonTCGCatalogGenerator(json_file) pdf_file = generator.generate_pdf() if pdf_file: print(f"\nCatalog generation completed!") print(f"PDF file: {pdf_file}") print(f"Output directory: {generator.output_dir}") else: print(f"\nPDF generation failed, but markdown file is available in: {generator.output_dir}") if __name__ == "__main__": main()