Files
pokemon-disco/run_scraper.py
pi-bot-01 e6dd999aeb Initial commit: Pokemon Discovery - TCG product scraper and PDF catalog generator
- Comprehensive scraper for Dollar General Pokemon TCG products
- Professional PDF catalog generator with UPC-A barcodes
- Robust anti-bot handling with requests + Selenium fallback
- Automatic image downloading and barcode generation
- Unix-friendly timestamped filenames
- Virtual environment support and dependency management
- Complete documentation and usage guides
2026-03-21 14:41:17 -07:00

139 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Pokemon Discovery - Scraper and Catalog Generator
Main script that runs both scraping and PDF generation
"""
import os
import sys
import subprocess
from datetime import datetime
from pathlib import Path
def install_requirements():
"""Install Python requirements"""
print("Installing Python requirements...")
try:
subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'],
check=True)
print("Requirements installed successfully!")
except subprocess.CalledProcessError as e:
print(f"Failed to install requirements: {e}")
return False
return True
def run_scraper():
"""Run the scraper to collect product data"""
print("=" * 60)
print("STEP 1: SCRAPING POKEMON TCG PRODUCTS")
print("=" * 60)
try:
result = subprocess.run([sys.executable, 'scraper.py'],
capture_output=True, text=True)
if result.returncode == 0:
print("Scraping completed successfully!")
print(result.stdout)
# Find the generated JSON file
json_files = list(Path('.').glob('pokemon_tcg_products_*.json'))
if json_files:
latest_file = max(json_files, key=os.path.getctime)
return str(latest_file)
else:
print("No JSON file was generated")
return None
else:
print("Scraping failed:")
print(result.stderr)
return None
except Exception as e:
print(f"Error running scraper: {e}")
return None
def run_pdf_generator(json_file):
"""Run the PDF generator with the scraped data"""
print("=" * 60)
print("STEP 2: GENERATING PDF CATALOG")
print("=" * 60)
try:
result = subprocess.run([sys.executable, 'pdf_generator.py', json_file],
capture_output=True, text=True)
if result.returncode == 0:
print("PDF generation completed successfully!")
print(result.stdout)
return True
else:
print("PDF generation failed:")
print(result.stderr)
return False
except Exception as e:
print(f"Error running PDF generator: {e}")
return False
def main():
print("Pokemon Discovery - Product Scraper & Catalog Generator")
print("=" * 60)
print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
# Check if requirements are installed
try:
import requests, bs4, barcode, PIL
print("✓ Required packages are available")
except ImportError as e:
print(f"✗ Missing required package: {e}")
print("Installing requirements...")
if not install_requirements():
sys.exit(1)
# Check if pandoc is available
try:
subprocess.run(['pandoc', '--version'],
capture_output=True, check=True)
print("✓ Pandoc is available for PDF generation")
except (subprocess.CalledProcessError, FileNotFoundError):
print("⚠ Pandoc not found. PDF generation may fail.")
print(" Install pandoc with: sudo apt install pandoc (Ubuntu/Debian)")
print(" or: brew install pandoc (macOS)")
print(" or: pacman -S pandoc (Arch Linux)")
print()
# Run scraper
json_file = run_scraper()
if not json_file:
print("Scraping failed. Exiting.")
sys.exit(1)
# Run PDF generator
if run_pdf_generator(json_file):
print("=" * 60)
print("SUCCESS! Both scraping and PDF generation completed.")
print("=" * 60)
print(f"JSON data: {json_file}")
print("PDF catalog: Check the catalog_output/ directory")
print()
print("Files generated:")
# List generated files
for file_pattern in ['pokemon_tcg_products_*.json', 'catalog_output/pokemon_tcg_catalog_*.pdf']:
files = list(Path('.').glob(file_pattern))
if files:
latest = max(files, key=os.path.getctime)
print(f" - {latest}")
else:
print("=" * 60)
print("PARTIAL SUCCESS: Scraping completed, but PDF generation failed.")
print("=" * 60)
print(f"JSON data: {json_file}")
print("You can manually run the PDF generator with:")
print(f" python3 pdf_generator.py {json_file}")
if __name__ == "__main__":
main()