Files
pokemon-disco/pdf_generator.py
pi-bot-01 c3691a474e Fix barcode generation and add comprehensive test results
- Fixed double .png extension issue in barcode generation
- Added test data file for demonstrating functionality
- Updated gitignore to allow test data while excluding output files
- Comprehensive testing of PDF generation pipeline
- All core features working: barcode generation, PDF creation, data processing
- Added detailed test results documentation

Test summary:
 Virtual environment setup
 Python dependencies installation
 UPC-A barcode generation (3-6KB PNG files)
 Professional PDF catalog generation (161KB output)
 Markdown formatting and file organization
 Error handling and fallbacks
2026-03-21 14:46:40 -07:00

279 lines
9.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Pokemon Discovery - TCG Product Catalog PDF Generator
Generates PDF catalog with product images, details, and UPC-A barcodes
"""
import json
import os
import sys
import requests
import subprocess
from datetime import datetime
from pathlib import Path
import barcode
from barcode.writer import ImageWriter
from PIL import Image, ImageDraw, ImageFont
import tempfile
import shutil
class PokemonTCGCatalogGenerator:
def __init__(self, json_file):
self.json_file = json_file
self.output_dir = Path("catalog_output")
self.images_dir = self.output_dir / "images"
self.barcodes_dir = self.output_dir / "barcodes"
# Create output directories
self.output_dir.mkdir(exist_ok=True)
self.images_dir.mkdir(exist_ok=True)
self.barcodes_dir.mkdir(exist_ok=True)
# Load product data
with open(json_file, 'r') as f:
self.products = json.load(f)
def download_image(self, url, filename):
"""Download product image"""
if not url:
return None
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
filepath = self.images_dir / filename
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
except Exception as e:
print(f"Failed to download image {url}: {e}")
return None
def generate_upc_barcode(self, sku):
"""Generate UPC-A barcode from SKU"""
try:
# Convert SKU to 12-digit UPC-A format
# Remove non-digits and pad/truncate to 11 digits (12th is check digit)
digits_only = ''.join(filter(str.isdigit, str(sku)))
if len(digits_only) < 11:
# Pad with zeros at the start
upc_base = digits_only.zfill(11)
else:
# Take the last 11 digits
upc_base = digits_only[-11:]
# Generate UPC-A barcode
upc_generator = barcode.get_barcode_class('upca')
upc = upc_generator(upc_base, writer=ImageWriter())
# Save barcode image
barcode_filename = f"barcode_{sku.replace('/', '_').replace(' ', '_')}"
barcode_path = self.barcodes_dir / barcode_filename
# Save with specific options for better appearance
upc.save(str(barcode_path).replace('.png', ''), options={
'module_width': 0.2,
'module_height': 15.0,
'quiet_zone': 6.5,
'font_size': 10,
'text_distance': 5.0,
'background': 'white',
'foreground': 'black'
})
final_path = f"{barcode_path}.png"
return final_path
except Exception as e:
print(f"Failed to generate barcode for SKU {sku}: {e}")
return None
def create_placeholder_image(self, width=300, height=200):
"""Create a placeholder image when product image is not available"""
img = Image.new('RGB', (width, height), color='lightgray')
draw = ImageDraw.Draw(img)
try:
# Try to use a system font
font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', 24)
except:
try:
font = ImageFont.truetype('arial.ttf', 24)
except:
font = ImageFont.load_default()
text = "No Image\nAvailable"
# Get text bounding box for centering
lines = text.split('\n')
y_offset = height // 2 - (len(lines) * 30) // 2
for line in lines:
bbox = draw.textbbox((0, 0), line, font=font)
text_width = bbox[2] - bbox[0]
x_offset = (width - text_width) // 2
draw.text((x_offset, y_offset), line, fill='darkgray', font=font)
y_offset += 30
placeholder_path = self.images_dir / "placeholder.png"
img.save(placeholder_path)
return placeholder_path
def generate_markdown(self):
"""Generate markdown content for the catalog"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
markdown = f"""---
title: "Pokemon TCG Product Catalog"
subtitle: "Dollar General - Generated {timestamp}"
author: "Automated Scraper"
date: "{timestamp}"
geometry: margin=1in
fontsize: 11pt
documentclass: article
---
# Pokemon TCG Product Catalog
Generated on: {timestamp}
Source: Dollar General
Total Products: {len(self.products)}
---
"""
for i, product in enumerate(self.products, 1):
print(f"Processing product {i}/{len(self.products)}: {product.get('title', 'Unknown')}")
# Download product image
image_path = None
if product.get('image_url'):
filename = f"product_{i}_{product.get('sku', 'unknown').replace('/', '_').replace(' ', '_')}.jpg"
image_path = self.download_image(product.get('image_url'), filename)
if not image_path:
# Use placeholder
image_path = self.create_placeholder_image()
# Generate barcode
barcode_path = None
if product.get('sku'):
barcode_path = self.generate_upc_barcode(product.get('sku'))
# Add product section to markdown
markdown += f"## {i}. {product.get('title', 'Unknown Product')}\n\n"
# Product image
if image_path:
rel_image_path = os.path.relpath(image_path, self.output_dir)
markdown += f"![Product Image]({rel_image_path}){{width=300px}}\n\n"
# Product details in a table
markdown += "| Field | Value |\n"
markdown += "|-------|-------|\n"
markdown += f"| **Title** | {product.get('title', 'N/A')} |\n"
markdown += f"| **Price** | {product.get('price', 'N/A')} |\n"
markdown += f"| **Stock** | {product.get('stock', 'N/A')} |\n"
markdown += f"| **SKU** | `{product.get('sku', 'N/A')}` |\n"
markdown += f"| **URL** | {product.get('url', 'N/A')} |\n"
markdown += "\n"
# Barcode
if barcode_path:
rel_barcode_path = os.path.relpath(barcode_path, self.output_dir)
markdown += f"**UPC-A Barcode:**\n\n"
markdown += f"![UPC-A Barcode]({rel_barcode_path}){{width=200px}}\n\n"
markdown += "---\n\n"
return markdown
def generate_pdf(self):
"""Generate PDF catalog using pandoc"""
print("Generating markdown content...")
markdown_content = self.generate_markdown()
# Save markdown file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
markdown_file = self.output_dir / f"pokemon_tcg_catalog_{timestamp}.md"
with open(markdown_file, 'w', encoding='utf-8') as f:
f.write(markdown_content)
print(f"Markdown saved to: {markdown_file}")
# Generate PDF using pandoc
pdf_file = self.output_dir / f"pokemon_tcg_catalog_{timestamp}.pdf"
print("Converting to PDF using pandoc...")
try:
subprocess.run([
'pandoc',
str(markdown_file),
'-o', str(pdf_file),
'--pdf-engine=xelatex',
'-V', 'colorlinks=true',
'-V', 'linkcolor=blue',
'-V', 'filecolor=magenta',
'-V', 'urlcolor=cyan',
'--toc',
'--toc-depth=2'
], check=True)
print(f"PDF generated successfully: {pdf_file}")
return pdf_file
except subprocess.CalledProcessError as e:
print(f"Pandoc conversion failed: {e}")
print("Trying with pdflatex instead...")
try:
subprocess.run([
'pandoc',
str(markdown_file),
'-o', str(pdf_file),
'--pdf-engine=pdflatex',
'--toc'
], check=True)
print(f"PDF generated successfully: {pdf_file}")
return pdf_file
except subprocess.CalledProcessError as e2:
print(f"PDF generation failed with both engines: {e2}")
print(f"Markdown file available at: {markdown_file}")
return None
except FileNotFoundError:
print("Error: pandoc not found. Please install pandoc to generate PDF.")
print(f"Markdown file available at: {markdown_file}")
return None
def main():
if len(sys.argv) != 2:
print("Usage: python3 pdf_generator.py <json_file>")
print("Example: python3 pdf_generator.py pokemon_tcg_products_20241221_143025.json")
sys.exit(1)
json_file = sys.argv[1]
if not os.path.exists(json_file):
print(f"Error: JSON file '{json_file}' not found")
sys.exit(1)
generator = PokemonTCGCatalogGenerator(json_file)
pdf_file = generator.generate_pdf()
if pdf_file:
print(f"\nCatalog generation completed!")
print(f"PDF file: {pdf_file}")
print(f"Output directory: {generator.output_dir}")
else:
print(f"\nPDF generation failed, but markdown file is available in: {generator.output_dir}")
if __name__ == "__main__":
main()