🎉 MAJOR BREAKTHROUGH: Dollar General API Endpoint Discovered!
✅ Successfully discovered internal API via HAR analysis: • Endpoint: https://dggo.dollargeneral.com/omni/api/v2/category/search/provider • Method: POST with JSON payload • Category ID: 723960 (Pokemon products) • Store Number: 17506 • Response: Contains SKU 41936301 and all Pokemon TCG products! 🔬 HAR Analysis Tools Added: • analyze_har.py - Extract API calls from HAR files • extract_api_details.py - Detailed API request format extraction • implement_api_scraper.py - Full API implementation framework • test_api_scraper.py - API endpoint testing 📋 API Documentation: • DISCOVERY_SUCCESS.md - Complete analysis and findings • api_request_template.json - Exact request format • scraper.py updated with API framework 🎯 KEY DISCOVERIES: ✅ Found exact API endpoint used by Dollar General website ✅ Documented complete request/response format ✅ Confirmed presence of test product (SKU 41936301) ✅ Identified Pokemon category ID and store parameters ✅ Ready for bulk product scraping once auth is implemented ⚡ Current Status: • Individual product extraction: 100% working • API framework: Discovered and documented • Authentication: Requires Bearer token (next challenge) • PDF generation: Fully functional This breakthrough enables potential bulk product discovery and makes Pokemon Discovery far more powerful for inventory management!
This commit is contained in:
135
extract_api_details.py
Normal file
135
extract_api_details.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extract exact API request details from HAR file
|
||||
"""
|
||||
|
||||
import json
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def extract_api_request_details():
|
||||
"""Extract the exact API request format"""
|
||||
|
||||
har_file = 'www.dollargeneral.com_Archive [26-03-21 15-14-28].har'
|
||||
|
||||
with open(har_file, 'r', encoding='utf-8') as f:
|
||||
har_data = json.load(f)
|
||||
|
||||
entries = har_data.get('log', {}).get('entries', [])
|
||||
|
||||
# Find the API calls that contain our product
|
||||
api_endpoint = "https://dggo.dollargeneral.com/omni/api/v2/category/search/provider"
|
||||
|
||||
successful_calls = []
|
||||
|
||||
for entry in entries:
|
||||
request = entry.get('request', {})
|
||||
response = entry.get('response', {})
|
||||
|
||||
if (request.get('url') == api_endpoint and
|
||||
request.get('method') == 'POST' and
|
||||
response.get('status') == 200):
|
||||
|
||||
# Check if response contains our product
|
||||
response_text = response.get('content', {}).get('text', '')
|
||||
if '41936301' in response_text and 'pokemon' in response_text.lower():
|
||||
successful_calls.append(entry)
|
||||
|
||||
print(f"Found {len(successful_calls)} successful API calls with Pokemon products")
|
||||
print()
|
||||
|
||||
for i, entry in enumerate(successful_calls):
|
||||
request = entry.get('request', {})
|
||||
response = entry.get('response', {})
|
||||
|
||||
print(f"=== API Call {i+1} ===")
|
||||
print(f"URL: {request.get('url')}")
|
||||
print(f"Method: {request.get('method')}")
|
||||
|
||||
# Extract headers
|
||||
headers = {}
|
||||
for header in request.get('headers', []):
|
||||
name = header.get('name')
|
||||
value = header.get('value')
|
||||
if name.lower() in ['authorization', 'content-type', 'accept', 'referer', 'user-agent']:
|
||||
headers[name] = value
|
||||
|
||||
print("Headers:")
|
||||
for name, value in headers.items():
|
||||
if name.lower() == 'authorization':
|
||||
print(f" {name}: {value[:50]}... (Bearer token)")
|
||||
else:
|
||||
print(f" {name}: {value}")
|
||||
|
||||
# Extract POST data
|
||||
post_data = request.get('postData', {})
|
||||
if post_data.get('text'):
|
||||
try:
|
||||
post_json = json.loads(post_data.get('text'))
|
||||
print("POST Data:")
|
||||
print(json.dumps(post_json, indent=2))
|
||||
except:
|
||||
print(f"POST Data (raw): {post_data.get('text')}")
|
||||
|
||||
# Analyze response
|
||||
response_text = response.get('content', {}).get('text', '')
|
||||
if response_text:
|
||||
try:
|
||||
response_json = json.loads(response_text)
|
||||
print(f"Response size: {len(response_text)} characters")
|
||||
|
||||
# Extract product information
|
||||
items = response_json.get('ItemList', {}).get('Items', [])
|
||||
print(f"Products found: {len(items)}")
|
||||
|
||||
# Show Pokemon products
|
||||
pokemon_products = []
|
||||
for item in items:
|
||||
title = item.get('Title', '').lower()
|
||||
if 'pokemon' in title or 'pokémon' in title:
|
||||
pokemon_products.append({
|
||||
'title': item.get('Title'),
|
||||
'sku': item.get('ItemNbr'),
|
||||
'upc': item.get('UPC'),
|
||||
'price': item.get('Price', {}).get('Amount'),
|
||||
'url': item.get('ProductUrl'),
|
||||
'in_stock': item.get('Inventory', {}).get('InStock'),
|
||||
'available_online': item.get('Inventory', {}).get('AvailableOnline')
|
||||
})
|
||||
|
||||
if pokemon_products:
|
||||
print(f"\nPokemon products in this response: {len(pokemon_products)}")
|
||||
for prod in pokemon_products:
|
||||
print(f" • {prod['title']}")
|
||||
print(f" SKU: {prod['sku']}, UPC: {prod['upc']}")
|
||||
print(f" Price: ${prod['price']}, In Stock: {prod['in_stock']}")
|
||||
print(f" URL: {prod['url']}")
|
||||
|
||||
# Extract the store number and filters used
|
||||
if i == 0: # Save the working request format
|
||||
with open('api_request_template.json', 'w') as f:
|
||||
json.dump({
|
||||
'endpoint': api_endpoint,
|
||||
'method': 'POST',
|
||||
'headers': headers,
|
||||
'post_data': post_json,
|
||||
'example_response': {
|
||||
'total_items': len(items),
|
||||
'pokemon_items': len(pokemon_products),
|
||||
'sample_pokemon_product': pokemon_products[0] if pokemon_products else None
|
||||
}
|
||||
}, f, indent=2)
|
||||
print(f"\n✅ Saved working API template to: api_request_template.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error parsing response: {e}")
|
||||
|
||||
print("\n" + "="*60 + "\n")
|
||||
|
||||
return successful_calls
|
||||
|
||||
if __name__ == "__main__":
|
||||
successful_calls = extract_api_request_details()
|
||||
|
||||
print("🎯 SUMMARY:")
|
||||
print(f" Successfully extracted {len(successful_calls)} working API calls")
|
||||
print(" Next step: Implement this API call in Pokemon Discovery scraper")
|
||||
Reference in New Issue
Block a user