#!/usr/bin/env python3 """ Test dynamic content loading for Pokemon Discovery """ import requests import json from bs4 import BeautifulSoup import time def test_api_endpoints(): """Try to find API endpoints that might return product data""" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.9', 'Referer': 'https://www.dollargeneral.com/c/toys/pokemon' } # Test potential API endpoints api_tests = [ 'https://www.dollargeneral.com/api/products/search?q=pokemon', 'https://www.dollargeneral.com/api/v1/products?category=toys&query=pokemon', 'https://www.dollargeneral.com/dg/search?q=pokemon&category=toys', 'https://www.dollargeneral.com/api/search?term=pokemon+trading+card', ] print("=== Testing API Endpoints ===") for url in api_tests: try: print(f"Testing: {url}") response = requests.get(url, headers=headers, timeout=10) print(f" Status: {response.status_code}") if response.status_code == 200: try: data = response.json() print(f" JSON Response: {len(str(data))} characters") if 'products' in str(data).lower(): print(" ✓ Contains 'products'") if 'pokemon' in str(data).lower(): print(" ✓ Contains 'pokemon'") except: print(f" Text Response: {len(response.text)} characters") print() except Exception as e: print(f" Error: {e}") print() def test_network_requests(): """Analyze the search page to find AJAX calls""" url = 'https://www.dollargeneral.com/c/toys/pokemon?q=&soldAtStore=true' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } print("=== Analyzing Search Page for API Calls ===") try: response = requests.get(url, headers=headers, timeout=30) soup = BeautifulSoup(response.text, 'html.parser') # Look for API endpoints in JavaScript scripts = soup.find_all('script') api_patterns = [] for script in scripts: if script.string: content = script.string # Look for API endpoints import re patterns = [ r'(?:api|Api|API)["\'\s]*[:=]["\'\s]*([^"\']+)', r'(?:endpoint|url|baseURL)["\'\s]*[:=]["\'\s]*([^"\']+)', r'fetch\s*\(\s*["\']([^"\']+)["\']', r'xhr\.open\s*\(\s*["\'][^"\']*["\'],\s*["\']([^"\']+)["\']', r'/api/[^"\'\\s]+', r'/search[^"\'\\s]*', ] for pattern in patterns: matches = re.findall(pattern, content, re.IGNORECASE) for match in matches: if 'dollargeneral' in match or match.startswith('/'): api_patterns.append(match) # Remove duplicates and clean up unique_apis = list(set(api_patterns)) print(f"Found {len(unique_apis)} potential API endpoints:") for api in unique_apis[:10]: # Show first 10 print(f" -> {api}") return unique_apis except Exception as e: print(f"Error analyzing page: {e}") return [] def test_sitemap_approach(): """Try to find products via sitemap""" print("=== Testing Sitemap Approach ===") sitemap_urls = [ 'https://www.dollargeneral.com/sitemap.xml', 'https://www.dollargeneral.com/robots.txt' ] for url in sitemap_urls: try: print(f"Testing: {url}") response = requests.get(url, timeout=10) print(f" Status: {response.status_code}") if response.status_code == 200: content = response.text if 'pokemon' in content.lower(): print(" ✓ Contains Pokemon references") if '/p/' in content: print(" ✓ Contains product URLs (/p/)") print(f" Content length: {len(content)} characters") print() except Exception as e: print(f" Error: {e}") print() if __name__ == "__main__": print("Pokemon Discovery - Dynamic Content Testing") print("=" * 60) print() # Test various approaches to find products test_api_endpoints() print() apis = test_network_requests() print() test_sitemap_approach() print() print("=" * 60) print("Summary:") print("- Individual product extraction: ✅ WORKING") print("- Product URLs can be processed if found") print("- Main challenge: Finding product URLs from search page") print("- Dynamic content requires browser automation or API discovery")