Initial commit: Pokemon Discovery - TCG product scraper and PDF catalog generator
- Comprehensive scraper for Dollar General Pokemon TCG products - Professional PDF catalog generator with UPC-A barcodes - Robust anti-bot handling with requests + Selenium fallback - Automatic image downloading and barcode generation - Unix-friendly timestamped filenames - Virtual environment support and dependency management - Complete documentation and usage guides
This commit is contained in:
139
run_scraper.py
Executable file
139
run_scraper.py
Executable file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pokemon Discovery - Scraper and Catalog Generator
|
||||
Main script that runs both scraping and PDF generation
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
def install_requirements():
|
||||
"""Install Python requirements"""
|
||||
print("Installing Python requirements...")
|
||||
try:
|
||||
subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'],
|
||||
check=True)
|
||||
print("Requirements installed successfully!")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to install requirements: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
def run_scraper():
|
||||
"""Run the scraper to collect product data"""
|
||||
print("=" * 60)
|
||||
print("STEP 1: SCRAPING POKEMON TCG PRODUCTS")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
result = subprocess.run([sys.executable, 'scraper.py'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Scraping completed successfully!")
|
||||
print(result.stdout)
|
||||
|
||||
# Find the generated JSON file
|
||||
json_files = list(Path('.').glob('pokemon_tcg_products_*.json'))
|
||||
if json_files:
|
||||
latest_file = max(json_files, key=os.path.getctime)
|
||||
return str(latest_file)
|
||||
else:
|
||||
print("No JSON file was generated")
|
||||
return None
|
||||
else:
|
||||
print("Scraping failed:")
|
||||
print(result.stderr)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error running scraper: {e}")
|
||||
return None
|
||||
|
||||
def run_pdf_generator(json_file):
|
||||
"""Run the PDF generator with the scraped data"""
|
||||
print("=" * 60)
|
||||
print("STEP 2: GENERATING PDF CATALOG")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
result = subprocess.run([sys.executable, 'pdf_generator.py', json_file],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("PDF generation completed successfully!")
|
||||
print(result.stdout)
|
||||
return True
|
||||
else:
|
||||
print("PDF generation failed:")
|
||||
print(result.stderr)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error running PDF generator: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print("Pokemon Discovery - Product Scraper & Catalog Generator")
|
||||
print("=" * 60)
|
||||
print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# Check if requirements are installed
|
||||
try:
|
||||
import requests, bs4, barcode, PIL
|
||||
print("✓ Required packages are available")
|
||||
except ImportError as e:
|
||||
print(f"✗ Missing required package: {e}")
|
||||
print("Installing requirements...")
|
||||
if not install_requirements():
|
||||
sys.exit(1)
|
||||
|
||||
# Check if pandoc is available
|
||||
try:
|
||||
subprocess.run(['pandoc', '--version'],
|
||||
capture_output=True, check=True)
|
||||
print("✓ Pandoc is available for PDF generation")
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("⚠ Pandoc not found. PDF generation may fail.")
|
||||
print(" Install pandoc with: sudo apt install pandoc (Ubuntu/Debian)")
|
||||
print(" or: brew install pandoc (macOS)")
|
||||
print(" or: pacman -S pandoc (Arch Linux)")
|
||||
|
||||
print()
|
||||
|
||||
# Run scraper
|
||||
json_file = run_scraper()
|
||||
if not json_file:
|
||||
print("Scraping failed. Exiting.")
|
||||
sys.exit(1)
|
||||
|
||||
# Run PDF generator
|
||||
if run_pdf_generator(json_file):
|
||||
print("=" * 60)
|
||||
print("SUCCESS! Both scraping and PDF generation completed.")
|
||||
print("=" * 60)
|
||||
print(f"JSON data: {json_file}")
|
||||
print("PDF catalog: Check the catalog_output/ directory")
|
||||
print()
|
||||
print("Files generated:")
|
||||
|
||||
# List generated files
|
||||
for file_pattern in ['pokemon_tcg_products_*.json', 'catalog_output/pokemon_tcg_catalog_*.pdf']:
|
||||
files = list(Path('.').glob(file_pattern))
|
||||
if files:
|
||||
latest = max(files, key=os.path.getctime)
|
||||
print(f" - {latest}")
|
||||
else:
|
||||
print("=" * 60)
|
||||
print("PARTIAL SUCCESS: Scraping completed, but PDF generation failed.")
|
||||
print("=" * 60)
|
||||
print(f"JSON data: {json_file}")
|
||||
print("You can manually run the PDF generator with:")
|
||||
print(f" python3 pdf_generator.py {json_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user