🎉 MAJOR BREAKTHROUGH: Dollar General API Endpoint Discovered!
✅ Successfully discovered internal API via HAR analysis: • Endpoint: https://dggo.dollargeneral.com/omni/api/v2/category/search/provider • Method: POST with JSON payload • Category ID: 723960 (Pokemon products) • Store Number: 17506 • Response: Contains SKU 41936301 and all Pokemon TCG products! 🔬 HAR Analysis Tools Added: • analyze_har.py - Extract API calls from HAR files • extract_api_details.py - Detailed API request format extraction • implement_api_scraper.py - Full API implementation framework • test_api_scraper.py - API endpoint testing 📋 API Documentation: • DISCOVERY_SUCCESS.md - Complete analysis and findings • api_request_template.json - Exact request format • scraper.py updated with API framework 🎯 KEY DISCOVERIES: ✅ Found exact API endpoint used by Dollar General website ✅ Documented complete request/response format ✅ Confirmed presence of test product (SKU 41936301) ✅ Identified Pokemon category ID and store parameters ✅ Ready for bulk product scraping once auth is implemented ⚡ Current Status: • Individual product extraction: 100% working • API framework: Discovered and documented • Authentication: Requires Bearer token (next challenge) • PDF generation: Fully functional This breakthrough enables potential bulk product discovery and makes Pokemon Discovery far more powerful for inventory management!
This commit is contained in:
181
analyze_har.py
Normal file
181
analyze_har.py
Normal file
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze HAR file to find product loading endpoints
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def analyze_har_file(har_file):
|
||||
"""Analyze HAR file to find product-related API calls"""
|
||||
|
||||
print(f"Analyzing HAR file: {har_file}")
|
||||
|
||||
try:
|
||||
with open(har_file, 'r', encoding='utf-8') as f:
|
||||
har_data = json.load(f)
|
||||
|
||||
entries = har_data.get('log', {}).get('entries', [])
|
||||
print(f"Found {len(entries)} network requests")
|
||||
print()
|
||||
|
||||
# Filter for API calls that might contain product data
|
||||
api_calls = []
|
||||
product_calls = []
|
||||
|
||||
for entry in entries:
|
||||
request = entry.get('request', {})
|
||||
response = entry.get('response', {})
|
||||
url = request.get('url', '')
|
||||
method = request.get('method', '')
|
||||
status = response.get('status', 0)
|
||||
|
||||
# Look for API calls
|
||||
parsed_url = urlparse(url)
|
||||
path = parsed_url.path.lower()
|
||||
query = parsed_url.query.lower()
|
||||
|
||||
# Check if this might be a product-related API call
|
||||
is_api = any(keyword in path for keyword in ['/api/', '/search', '/products', '/inventory', '/catalog'])
|
||||
contains_pokemon = 'pokemon' in query or 'pokemon' in path
|
||||
is_json_response = any(h.get('name', '').lower() == 'content-type' and 'json' in h.get('value', '')
|
||||
for h in response.get('headers', []))
|
||||
|
||||
if is_api or is_json_response:
|
||||
api_calls.append({
|
||||
'url': url,
|
||||
'method': method,
|
||||
'status': status,
|
||||
'is_pokemon': contains_pokemon,
|
||||
'response_size': response.get('bodySize', 0)
|
||||
})
|
||||
|
||||
if contains_pokemon or 'product' in path or 'search' in path:
|
||||
product_calls.append(entry)
|
||||
|
||||
print(f"Found {len(api_calls)} potential API calls")
|
||||
print(f"Found {len(product_calls)} product-related calls")
|
||||
print()
|
||||
|
||||
# Show interesting API calls
|
||||
print("=== API CALLS ===")
|
||||
for call in api_calls[:20]: # Show first 20
|
||||
url = call['url']
|
||||
pokemon_flag = "🎯" if call['is_pokemon'] else " "
|
||||
print(f"{pokemon_flag} {call['method']} {call['status']} - {url}")
|
||||
if call['response_size'] > 1000:
|
||||
print(f" 📦 Response size: {call['response_size']} bytes")
|
||||
|
||||
print()
|
||||
|
||||
# Analyze product-specific calls in detail
|
||||
if product_calls:
|
||||
print("=== DETAILED PRODUCT CALL ANALYSIS ===")
|
||||
|
||||
for i, entry in enumerate(product_calls[:5]): # Analyze first 5 product calls
|
||||
request = entry.get('request', {})
|
||||
response = entry.get('response', {})
|
||||
|
||||
print(f"\n--- Product Call {i+1} ---")
|
||||
print(f"URL: {request.get('url', '')}")
|
||||
print(f"Method: {request.get('method', '')}")
|
||||
print(f"Status: {response.get('status', 0)}")
|
||||
|
||||
# Show headers
|
||||
headers = request.get('headers', [])
|
||||
important_headers = [h for h in headers if h.get('name', '').lower() in
|
||||
['accept', 'content-type', 'authorization', 'x-api-key', 'referer']]
|
||||
if important_headers:
|
||||
print("Important Headers:")
|
||||
for header in important_headers:
|
||||
print(f" {header.get('name')}: {header.get('value', '')[:100]}")
|
||||
|
||||
# Show query parameters
|
||||
parsed = urlparse(request.get('url', ''))
|
||||
if parsed.query:
|
||||
params = parse_qs(parsed.query)
|
||||
print("Query Parameters:")
|
||||
for key, values in params.items():
|
||||
print(f" {key}: {values}")
|
||||
|
||||
# Show POST data if any
|
||||
post_data = request.get('postData', {})
|
||||
if post_data.get('text'):
|
||||
print(f"POST Data: {post_data.get('text')[:200]}...")
|
||||
|
||||
# Check response content
|
||||
response_content = response.get('content', {})
|
||||
response_text = response_content.get('text', '')
|
||||
|
||||
if response_text:
|
||||
print(f"Response size: {len(response_text)} characters")
|
||||
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
response_json = json.loads(response_text)
|
||||
print("✓ Valid JSON response")
|
||||
|
||||
# Look for product-like structures
|
||||
def find_products_in_json(obj, path=""):
|
||||
products = []
|
||||
if isinstance(obj, dict):
|
||||
for key, value in obj.items():
|
||||
new_path = f"{path}.{key}" if path else key
|
||||
if key.lower() in ['products', 'items', 'results', 'data']:
|
||||
if isinstance(value, list):
|
||||
products.append((new_path, len(value)))
|
||||
products.extend(find_products_in_json(value, new_path))
|
||||
elif isinstance(obj, list):
|
||||
for idx, item in enumerate(obj):
|
||||
products.extend(find_products_in_json(item, f"{path}[{idx}]"))
|
||||
return products
|
||||
|
||||
product_arrays = find_products_in_json(response_json)
|
||||
if product_arrays:
|
||||
print("Potential product arrays found:")
|
||||
for path, count in product_arrays:
|
||||
print(f" {path}: {count} items")
|
||||
|
||||
# Check for our specific product
|
||||
response_str = str(response_json).lower()
|
||||
if '41936301' in response_str:
|
||||
print("🎯 CONTAINS OUR TEST PRODUCT SKU!")
|
||||
if '728192558375' in response_str:
|
||||
print("🎯 CONTAINS OUR TEST PRODUCT UPC!")
|
||||
if 'pokemon' in response_str:
|
||||
print("🎯 CONTAINS POKEMON REFERENCES!")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
print("Response is not JSON")
|
||||
# Check if it contains our product anyway
|
||||
if '41936301' in response_text:
|
||||
print("🎯 CONTAINS OUR TEST PRODUCT SKU!")
|
||||
|
||||
# Return the most promising API calls
|
||||
return api_calls, product_calls
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error analyzing HAR file: {e}")
|
||||
return [], []
|
||||
|
||||
if __name__ == "__main__":
|
||||
har_files = ['www.dollargeneral.com_Archive [26-03-21 15-14-28].har']
|
||||
|
||||
for har_file in har_files:
|
||||
try:
|
||||
api_calls, product_calls = analyze_har_file(har_file)
|
||||
print(f"\n🎯 SUMMARY:")
|
||||
print(f" Total API calls: {len(api_calls)}")
|
||||
print(f" Product-related calls: {len(product_calls)}")
|
||||
|
||||
if product_calls:
|
||||
print(f"\n💡 NEXT STEPS:")
|
||||
print(f" 1. Test the identified API endpoints")
|
||||
print(f" 2. Replicate the headers and parameters")
|
||||
print(f" 3. Integrate successful calls into Pokemon Discovery")
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"HAR file not found: {har_file}")
|
||||
except Exception as e:
|
||||
print(f"Error processing {har_file}: {e}")
|
||||
Reference in New Issue
Block a user