#!/usr/bin/env python3 """ Analyze HAR file to find product loading endpoints """ import json import sys from urllib.parse import urlparse, parse_qs def analyze_har_file(har_file): """Analyze HAR file to find product-related API calls""" print(f"Analyzing HAR file: {har_file}") try: with open(har_file, 'r', encoding='utf-8') as f: har_data = json.load(f) entries = har_data.get('log', {}).get('entries', []) print(f"Found {len(entries)} network requests") print() # Filter for API calls that might contain product data api_calls = [] product_calls = [] for entry in entries: request = entry.get('request', {}) response = entry.get('response', {}) url = request.get('url', '') method = request.get('method', '') status = response.get('status', 0) # Look for API calls parsed_url = urlparse(url) path = parsed_url.path.lower() query = parsed_url.query.lower() # Check if this might be a product-related API call is_api = any(keyword in path for keyword in ['/api/', '/search', '/products', '/inventory', '/catalog']) contains_pokemon = 'pokemon' in query or 'pokemon' in path is_json_response = any(h.get('name', '').lower() == 'content-type' and 'json' in h.get('value', '') for h in response.get('headers', [])) if is_api or is_json_response: api_calls.append({ 'url': url, 'method': method, 'status': status, 'is_pokemon': contains_pokemon, 'response_size': response.get('bodySize', 0) }) if contains_pokemon or 'product' in path or 'search' in path: product_calls.append(entry) print(f"Found {len(api_calls)} potential API calls") print(f"Found {len(product_calls)} product-related calls") print() # Show interesting API calls print("=== API CALLS ===") for call in api_calls[:20]: # Show first 20 url = call['url'] pokemon_flag = "šŸŽÆ" if call['is_pokemon'] else " " print(f"{pokemon_flag} {call['method']} {call['status']} - {url}") if call['response_size'] > 1000: print(f" šŸ“¦ Response size: {call['response_size']} bytes") print() # Analyze product-specific calls in detail if product_calls: print("=== DETAILED PRODUCT CALL ANALYSIS ===") for i, entry in enumerate(product_calls[:5]): # Analyze first 5 product calls request = entry.get('request', {}) response = entry.get('response', {}) print(f"\n--- Product Call {i+1} ---") print(f"URL: {request.get('url', '')}") print(f"Method: {request.get('method', '')}") print(f"Status: {response.get('status', 0)}") # Show headers headers = request.get('headers', []) important_headers = [h for h in headers if h.get('name', '').lower() in ['accept', 'content-type', 'authorization', 'x-api-key', 'referer']] if important_headers: print("Important Headers:") for header in important_headers: print(f" {header.get('name')}: {header.get('value', '')[:100]}") # Show query parameters parsed = urlparse(request.get('url', '')) if parsed.query: params = parse_qs(parsed.query) print("Query Parameters:") for key, values in params.items(): print(f" {key}: {values}") # Show POST data if any post_data = request.get('postData', {}) if post_data.get('text'): print(f"POST Data: {post_data.get('text')[:200]}...") # Check response content response_content = response.get('content', {}) response_text = response_content.get('text', '') if response_text: print(f"Response size: {len(response_text)} characters") # Try to parse as JSON try: response_json = json.loads(response_text) print("āœ“ Valid JSON response") # Look for product-like structures def find_products_in_json(obj, path=""): products = [] if isinstance(obj, dict): for key, value in obj.items(): new_path = f"{path}.{key}" if path else key if key.lower() in ['products', 'items', 'results', 'data']: if isinstance(value, list): products.append((new_path, len(value))) products.extend(find_products_in_json(value, new_path)) elif isinstance(obj, list): for idx, item in enumerate(obj): products.extend(find_products_in_json(item, f"{path}[{idx}]")) return products product_arrays = find_products_in_json(response_json) if product_arrays: print("Potential product arrays found:") for path, count in product_arrays: print(f" {path}: {count} items") # Check for our specific product response_str = str(response_json).lower() if '41936301' in response_str: print("šŸŽÆ CONTAINS OUR TEST PRODUCT SKU!") if '728192558375' in response_str: print("šŸŽÆ CONTAINS OUR TEST PRODUCT UPC!") if 'pokemon' in response_str: print("šŸŽÆ CONTAINS POKEMON REFERENCES!") except json.JSONDecodeError: print("Response is not JSON") # Check if it contains our product anyway if '41936301' in response_text: print("šŸŽÆ CONTAINS OUR TEST PRODUCT SKU!") # Return the most promising API calls return api_calls, product_calls except Exception as e: print(f"Error analyzing HAR file: {e}") return [], [] if __name__ == "__main__": har_files = ['www.dollargeneral.com_Archive [26-03-21 15-14-28].har'] for har_file in har_files: try: api_calls, product_calls = analyze_har_file(har_file) print(f"\nšŸŽÆ SUMMARY:") print(f" Total API calls: {len(api_calls)}") print(f" Product-related calls: {len(product_calls)}") if product_calls: print(f"\nšŸ’” NEXT STEPS:") print(f" 1. Test the identified API endpoints") print(f" 2. Replicate the headers and parameters") print(f" 3. Integrate successful calls into Pokemon Discovery") except FileNotFoundError: print(f"HAR file not found: {har_file}") except Exception as e: print(f"Error processing {har_file}: {e}")