🎉 MAJOR BREAKTHROUGH: Dollar General API Endpoint Discovered!

 Successfully discovered internal API via HAR analysis:
• Endpoint: https://dggo.dollargeneral.com/omni/api/v2/category/search/provider
• Method: POST with JSON payload
• Category ID: 723960 (Pokemon products)
• Store Number: 17506
• Response: Contains SKU 41936301 and all Pokemon TCG products!

🔬 HAR Analysis Tools Added:
• analyze_har.py - Extract API calls from HAR files
• extract_api_details.py - Detailed API request format extraction
• implement_api_scraper.py - Full API implementation framework
• test_api_scraper.py - API endpoint testing

📋 API Documentation:
• DISCOVERY_SUCCESS.md - Complete analysis and findings
• api_request_template.json - Exact request format
• scraper.py updated with API framework

🎯 KEY DISCOVERIES:
 Found exact API endpoint used by Dollar General website
 Documented complete request/response format
 Confirmed presence of test product (SKU 41936301)
 Identified Pokemon category ID and store parameters
 Ready for bulk product scraping once auth is implemented

 Current Status:
• Individual product extraction: 100% working
• API framework: Discovered and documented
• Authentication: Requires Bearer token (next challenge)
• PDF generation: Fully functional

This breakthrough enables potential bulk product discovery and
makes Pokemon Discovery far more powerful for inventory management!
This commit is contained in:
2026-03-21 15:21:36 -07:00
parent 729ed0cfc6
commit 58e995f6a6
9 changed files with 51096 additions and 6 deletions

181
analyze_har.py Normal file
View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
"""
Analyze HAR file to find product loading endpoints
"""
import json
import sys
from urllib.parse import urlparse, parse_qs
def analyze_har_file(har_file):
"""Analyze HAR file to find product-related API calls"""
print(f"Analyzing HAR file: {har_file}")
try:
with open(har_file, 'r', encoding='utf-8') as f:
har_data = json.load(f)
entries = har_data.get('log', {}).get('entries', [])
print(f"Found {len(entries)} network requests")
print()
# Filter for API calls that might contain product data
api_calls = []
product_calls = []
for entry in entries:
request = entry.get('request', {})
response = entry.get('response', {})
url = request.get('url', '')
method = request.get('method', '')
status = response.get('status', 0)
# Look for API calls
parsed_url = urlparse(url)
path = parsed_url.path.lower()
query = parsed_url.query.lower()
# Check if this might be a product-related API call
is_api = any(keyword in path for keyword in ['/api/', '/search', '/products', '/inventory', '/catalog'])
contains_pokemon = 'pokemon' in query or 'pokemon' in path
is_json_response = any(h.get('name', '').lower() == 'content-type' and 'json' in h.get('value', '')
for h in response.get('headers', []))
if is_api or is_json_response:
api_calls.append({
'url': url,
'method': method,
'status': status,
'is_pokemon': contains_pokemon,
'response_size': response.get('bodySize', 0)
})
if contains_pokemon or 'product' in path or 'search' in path:
product_calls.append(entry)
print(f"Found {len(api_calls)} potential API calls")
print(f"Found {len(product_calls)} product-related calls")
print()
# Show interesting API calls
print("=== API CALLS ===")
for call in api_calls[:20]: # Show first 20
url = call['url']
pokemon_flag = "🎯" if call['is_pokemon'] else " "
print(f"{pokemon_flag} {call['method']} {call['status']} - {url}")
if call['response_size'] > 1000:
print(f" 📦 Response size: {call['response_size']} bytes")
print()
# Analyze product-specific calls in detail
if product_calls:
print("=== DETAILED PRODUCT CALL ANALYSIS ===")
for i, entry in enumerate(product_calls[:5]): # Analyze first 5 product calls
request = entry.get('request', {})
response = entry.get('response', {})
print(f"\n--- Product Call {i+1} ---")
print(f"URL: {request.get('url', '')}")
print(f"Method: {request.get('method', '')}")
print(f"Status: {response.get('status', 0)}")
# Show headers
headers = request.get('headers', [])
important_headers = [h for h in headers if h.get('name', '').lower() in
['accept', 'content-type', 'authorization', 'x-api-key', 'referer']]
if important_headers:
print("Important Headers:")
for header in important_headers:
print(f" {header.get('name')}: {header.get('value', '')[:100]}")
# Show query parameters
parsed = urlparse(request.get('url', ''))
if parsed.query:
params = parse_qs(parsed.query)
print("Query Parameters:")
for key, values in params.items():
print(f" {key}: {values}")
# Show POST data if any
post_data = request.get('postData', {})
if post_data.get('text'):
print(f"POST Data: {post_data.get('text')[:200]}...")
# Check response content
response_content = response.get('content', {})
response_text = response_content.get('text', '')
if response_text:
print(f"Response size: {len(response_text)} characters")
# Try to parse as JSON
try:
response_json = json.loads(response_text)
print("✓ Valid JSON response")
# Look for product-like structures
def find_products_in_json(obj, path=""):
products = []
if isinstance(obj, dict):
for key, value in obj.items():
new_path = f"{path}.{key}" if path else key
if key.lower() in ['products', 'items', 'results', 'data']:
if isinstance(value, list):
products.append((new_path, len(value)))
products.extend(find_products_in_json(value, new_path))
elif isinstance(obj, list):
for idx, item in enumerate(obj):
products.extend(find_products_in_json(item, f"{path}[{idx}]"))
return products
product_arrays = find_products_in_json(response_json)
if product_arrays:
print("Potential product arrays found:")
for path, count in product_arrays:
print(f" {path}: {count} items")
# Check for our specific product
response_str = str(response_json).lower()
if '41936301' in response_str:
print("🎯 CONTAINS OUR TEST PRODUCT SKU!")
if '728192558375' in response_str:
print("🎯 CONTAINS OUR TEST PRODUCT UPC!")
if 'pokemon' in response_str:
print("🎯 CONTAINS POKEMON REFERENCES!")
except json.JSONDecodeError:
print("Response is not JSON")
# Check if it contains our product anyway
if '41936301' in response_text:
print("🎯 CONTAINS OUR TEST PRODUCT SKU!")
# Return the most promising API calls
return api_calls, product_calls
except Exception as e:
print(f"Error analyzing HAR file: {e}")
return [], []
if __name__ == "__main__":
har_files = ['www.dollargeneral.com_Archive [26-03-21 15-14-28].har']
for har_file in har_files:
try:
api_calls, product_calls = analyze_har_file(har_file)
print(f"\n🎯 SUMMARY:")
print(f" Total API calls: {len(api_calls)}")
print(f" Product-related calls: {len(product_calls)}")
if product_calls:
print(f"\n💡 NEXT STEPS:")
print(f" 1. Test the identified API endpoints")
print(f" 2. Replicate the headers and parameters")
print(f" 3. Integrate successful calls into Pokemon Discovery")
except FileNotFoundError:
print(f"HAR file not found: {har_file}")
except Exception as e:
print(f"Error processing {har_file}: {e}")