🔍 Debug: Why only one product found - Dynamic loading analysis
✅ MYSTERY SOLVED: Pokemon page loads but products are dynamic! 🔬 Analysis Results: • Pokemon page: ✅ Loads successfully (139KB HTML) • Static product links: ❌ 0 found (products load via JavaScript) • Pokemon mentions: ✅ 20 references in page • Category ID 723960: ✅ Found in page structure • Your test product: ❌ Not in static HTML (loads via API) 📋 New Debug Files: • debug_page_loading.py - Technical analysis of page loading • WHY_ONLY_ONE_PRODUCT.md - Complete explanation with solutions • pokemon_page_sample.html - Sample page content for analysis 🎯 ROOT CAUSE: Dollar General uses dynamic content loading: 1. Page loads basic HTML structure 2. JavaScript makes API calls to get products 3. API returns 4-12 Pokemon products as JSON 4. Products rendered into DOM after page load 5. Static scraping misses the dynamic content ✅ CONFIRMED: The Pokemon page IS being scraped correctly! ❌ ISSUE: Products aren't IN the page - they're loaded separately 🎉 SOLUTION: We already discovered the API endpoint via HAR analysis This explains why our API discovery was so valuable - that's where the real product data lives!
This commit is contained in:
294
pokemon_page_sample.html
Normal file
294
pokemon_page_sample.html
Normal file
@@ -0,0 +1,294 @@
|
||||
|
||||
<!DOCTYPE HTML>
|
||||
<html lang="en">
|
||||
<head>
|
||||
|
||||
|
||||
<meta charset="UTF-8"/>
|
||||
<title>
|
||||
Pokemon
|
||||
</title>
|
||||
<!-- Iterate over preloadUrls -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<meta name="robots" content="index, follow"/>
|
||||
|
||||
|
||||
|
||||
<meta name="description" content="Shop for Pokemon at Dollar General."/>
|
||||
<meta name="template" content="category-page-template"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
|
||||
|
||||
|
||||
<meta name="content-page-ref" content="eyzXWsPCDMW1KkhXo6-vK-QOHHXaDV4DTx4MUHOL5zAPRiNcJ9pD0H1_MbrY0VDfnmuWWl_PiDqTS8zA-qwgPQ"/>
|
||||
<script defer="defer" type="text/javascript" src="/.rum/@adobe/helix-rum-js@%5E2/dist/micro.js"></script>
|
||||
<script>
|
||||
window.pageConfig = Object.assign(window.pageConfig || {}, {
|
||||
googleApiKey: "AIzaSyDi0nb6nKeHaDJWFtAvbAIPKBrUuAc_mTY",
|
||||
isEditMode: "false"
|
||||
});
|
||||
|
||||
// Expose WCM mode information to frontend
|
||||
|
||||
window.DG = window.DG || {};
|
||||
window.DG.wcmMode = {
|
||||
isEdit: false,
|
||||
isPreview: false,
|
||||
isDisabled: true,
|
||||
isDesign: false
|
||||
};
|
||||
|
||||
</script>
|
||||
|
||||
<script>
|
||||
window.DG = window.DG || {};
|
||||
window.DG.aemData = window.DG.aemData || {};
|
||||
window.DG.aemData.config = Object.assign(window.DG.aemData.config || {}, {
|
||||
shoppingListPageUrl: "https:\/\/www.dollargeneral.com\/shopping\u002Dlist",
|
||||
cartPageUrl: "https:\/\/www.dollargeneral.com\/cart",
|
||||
checkOutPageUrl: "https:\/\/www.dollargeneral.com\/cart\/checkout",
|
||||
orderPlacedPageUrl: "https:\/\/www.dollargeneral.com\/cart\/order\u002Dplaced?orderguid",
|
||||
orderDetailsPageUrl: "https:\/\/www.dollargeneral.com\/order\u002Ddetails?orderguid",
|
||||
orderHelpPageUrl: "https:\/\/www.dollargeneral.com\/order\u002Ddetails\/order\u002Dhelp",
|
||||
substitutionsPageUrl: "https:\/\/www.dollargeneral.com\/cart\/substitutions",
|
||||
dealsPageUrl: "https:\/\/www.dollargeneral.com\/deals",
|
||||
offersPageUrl: "https:\/\/www.dollargeneral.com\/deals\/offers\/{offer\u002Dcode}",
|
||||
pdpPageUrl: "https:\/\/www.dollargeneral.com\/p\/{hyphenated\u002Dproduct\u002Dname}\/{upc}",
|
||||
weeklyAdsPageUrl: "https:\/\/www.dollargeneral.com\/deals\/weekly\u002Dads\/weekly\u002Dad\/{weekly\u002Dad\u002Did}?flyer_run_id={*}{weekly\u002Dad\u002Did}\x22{}{*}",
|
||||
signInPageUrl: "https:\/\/www.dollargeneral.com\/sign\u002Din",
|
||||
signUpPageUrl: "https:\/\/www.dollargeneral.com\/sign\u002Dup",
|
||||
omniServerUrl: "https:\/\/dggo.dollargeneral.com",
|
||||
deviceIdCookieMaxAge : "31536000",
|
||||
cookiesMaxAge : "31536000",
|
||||
useAkamaiLatLng : true,
|
||||
paymentMethodsUrl : "https:\/\/www.dollargeneral.com\/my\u002Dinformation?startpage=paymentmethods",
|
||||
orderHistoryUrl : "https:\/\/www.dollargeneral.com\/my\u002Dinformation?startpage=orders",
|
||||
walletPageUrl : "https:\/\/www.dollargeneral.com\/mydg\/wallet",
|
||||
couponsPageUrl : "https:\/\/www.dollargeneral.com\/deals\/coupons",
|
||||
couponDetailsUrl : "https:\/\/www.dollargeneral.com\/deals\/coupons\/{coupon\u002Dtype}\/{coupon\u002Dcode}",
|
||||
trackMyOrderPage : "https:\/\/www.dollargeneral.com\/orders",
|
||||
storeDirectoryUrl : "https:\/\/www.dollargeneral.com\/store\u002Ddirectory",
|
||||
myDgPageUrl : "https:\/\/www.dollargeneral.com\/mydg",
|
||||
inventoryCallSearchRadius : "15",
|
||||
orderSubstitutionsPageUrl : "https:\/\/www.dollargeneral.com\/order\u002Ddetails\/substitutions"
|
||||
});
|
||||
window.DG.aemData.sparkCodeErrorMsgs = Object.assign();
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- Facebook Meta Tags -->
|
||||
<meta property="og:type" content="website"/>
|
||||
<meta property="og:title" content="Pokemon"/>
|
||||
|
||||
|
||||
<meta property="og:url" content="https://www.dollargeneral.com/c/toys/pokemon"/>
|
||||
|
||||
<!-- Twitter Meta Tags -->
|
||||
<meta name="twitter:card" content="summary_large_image"/>
|
||||
<meta name="twitter:title" content="Pokemon"/>
|
||||
|
||||
|
||||
<meta property="twitter:url" content="https://www.dollargeneral.com/c/toys/pokemon"/>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "BreadcrumbList",
|
||||
"itemListElement": [
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 1,
|
||||
"item": {
|
||||
"@type": "Thing",
|
||||
"@id": "https://www.dollargeneral.com/",
|
||||
"name": "Dollar General"
|
||||
}
|
||||
},
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 2,
|
||||
"item": {
|
||||
"@type": "Thing",
|
||||
"@id": "https://www.dollargeneral.com/tps://www.dollargeneral.com/content/dollargeneral/us/en/c/toys/pokemon",
|
||||
"name": "tps:"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
/**
|
||||
* Store service enum in binary for "sezzle" is {@code 1000 0000 0000 0000 0000}.
|
||||
*/
|
||||
const SEZZLE_BIT_MASK_VALUE = 524288;
|
||||
|
||||
/**
|
||||
* Store service enum in binary for "bopis" is {@code 0000 0000 0000 0000 1000}.
|
||||
*/
|
||||
const BOPIS_BIT_MASK_VALUE = 8;
|
||||
|
||||
/**
|
||||
* Store service enum in binary for "delivery" is {@code 0000 0000 0001 0000 0000}.
|
||||
*/
|
||||
const DELIVERY_BIT_MASK_VALUE = 256;
|
||||
|
||||
/**
|
||||
* The key name for the object stored in {@link localStorage} for user store and guest store data.
|
||||
*/
|
||||
const PREFERRED_STORE_DATA_KEY = "preferredStoreData";
|
||||
|
||||
/**
|
||||
* The default store to set if user is either not signed in or we are not able to
|
||||
* determine a preferred store from the signed-in users data.
|
||||
*/
|
||||
const DEFAULT_STORE_NUMBER = 1014;
|
||||
|
||||
const DEFAULT_STORE_SEARCH_RADIUS = 10;
|
||||
|
||||
const DEFAULT_LATITUDE = 0;
|
||||
|
||||
const DEFAULT_LONGITUDE = 0;
|
||||
|
||||
const cookiesMaxAgeInSeconds = parseInt(
|
||||
window?.DG?.aemData?.config?.cookiesMaxAge || "31536000"
|
||||
);
|
||||
|
||||
const useCloudService = window.__FEATURE_FLAGS__?.useCloudServicesHeader;
|
||||
const enableStoreSelectionFromURL = window.__FEATURE_FLAGS__?.enableStoreSelectionFromURL;
|
||||
|
||||
const isSezzle = (storeService) =>
|
||||
(storeService & SEZZLE_BIT_MASK_VALUE) === SEZZLE_BIT_MASK_VALUE;
|
||||
const isBopis = (storeService) =>
|
||||
(storeService & BOPIS_BIT_MASK_VALUE) === BOPIS_BIT_MASK_VALUE;
|
||||
const isDelivery = (storeService) =>
|
||||
(storeService & DELIVERY_BIT_MASK_VALUE) === DELIVERY_BIT_MASK_VALUE;
|
||||
|
||||
const getQueryParam = (paramName) => {
|
||||
return new URLSearchParams(window.location.search).get(paramName);
|
||||
};
|
||||
|
||||
function getPreferredStoreDetails() {
|
||||
return window.localStorage.getItem("preferredStoreData");
|
||||
};
|
||||
|
||||
function getCookie(cname) {
|
||||
let name = cname + "=";
|
||||
let decodedCookie = decodeURIComponent(document.cookie);
|
||||
let ca = decodedCookie.split(';');
|
||||
for (let i = 0; i < ca.length; i++) {
|
||||
let c = ca[i];
|
||||
while (c.charAt(0) == ' ') {
|
||||
c = c.substring(1);
|
||||
}
|
||||
if (c.indexOf(name) == 0) {
|
||||
return c.substring(name.length, c.length);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
async function setStoreDetails(storeObj, isUser) {
|
||||
|
||||
let _preferredStore = getPreferredStoreDetails() ? JSON.parse(getPreferredStoreDetails()) : {};
|
||||
|
||||
if (!storeObj?.sn || !storeObj?.ad || !storeObj?.ct || !storeObj?.st || !storeObj?.zp) {
|
||||
return;
|
||||
}
|
||||
|
||||
const formattedZip = storeObj?.zp ? storeObj.zp.split("-")[0] : "";
|
||||
|
||||
const formatedAddress = function (storeObj) {
|
||||
return storeObj?.ct + ", " + storeObj?.st + " " + formattedZip;
|
||||
}
|
||||
|
||||
const updatedStoreDetails = {
|
||||
address: storeObj?.ad,
|
||||
city: storeObj?.ct,
|
||||
latitude: storeObj?.la,
|
||||
longitude: storeObj?.lo,
|
||||
state: storeObj?.st,
|
||||
storeService: storeObj?.ss,
|
||||
storeNumber: parseInt(storeObj?.sn),
|
||||
// TODO: remove 'number' after full roll out to cloud
|
||||
number: parseInt(storeObj?.sn),
|
||||
zip: storeObj?.zp,
|
||||
isSezzle: isSezzle(storeObj?.ss),
|
||||
isBopis: isBopis(storeObj?.ss),
|
||||
isDelivery: isDelivery(storeObj?.ss),
|
||||
lastUpdated: Date.now(),
|
||||
fullAddress: formatedAddress(storeObj),
|
||||
};
|
||||
|
||||
_preferredStore[isUser ? "userStore" : "guestStore"] = updatedStoreDetails;
|
||||
|
||||
localStorage.setItem(
|
||||
PREFERRED_STORE_DATA_KEY,
|
||||
JSON.stringify(_preferredStore)
|
||||
);
|
||||
|
||||
const setStorage = new CustomEvent("updateStoreEvent");
|
||||
window.dispatchEvent(setStorage);
|
||||
console.log('Store data updated, event dispatched');
|
||||
}
|
||||
|
||||
// gets default store details
|
||||
async function getGuestStoreDetails(storeNumber, fallbackFlow = false) {
|
||||
|
||||
let storeDetailsUrl = 'https://dggo.dollargeneral.com/omni/api/store/info/';
|
||||
storeDetailsUrl = storeDetailsUrl + storeNumber;
|
||||
|
||||
const guestStoreDetails = async () => {
|
||||
try {
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.open("GET", storeDetailsUrl, true);
|
||||
|
||||
xhr.setRequestHeader("Content-Type", "application/json");
|
||||
xhr.setRequestHeader("X-DG-appToken", getCookie("appToken"));
|
||||
xhr.setRequestHeader("X-DG-appSessionToken", getCookie('appSessionToken'));
|
||||
xhr.setRequestHeader("X-DG-customerGuid", getCookie('customerGuid'));
|
||||
xhr.setRequestHeader("X-DG-deviceUniqueId", getCookie('uniqueDeviceId'));
|
||||
xhr.setRequestHeader("X-DG-partnerApiToken", getCookie('partnerApiToken'));
|
||||
let bearerToken = "Bearer " + getCookie('idToken');
|
||||
xhr.setRequestHeader("Authorization", bearerToken);
|
||||
|
||||
if (useCloudService) {
|
||||
xhr.setRequestHeader("X-DG-CLOUD-SERVICE", useCloudService);
|
||||
}
|
||||
|
||||
xhr.onreadystatechange = function () {
|
||||
if (this.readyState === XMLHttpRequest.DONE && this.status === 200) {
|
||||
const sparkCode = this.getResponseHeader("x-spark");
|
||||
if (sparkCode && SPARK_CODES.tokenExpired.includes(sparkCode)) {
|
||||
refreshTokens()
|
||||
.then(() => guestStoreDetails())
|
||||
.catch(() => {
|
||||
console.error("Failed to refresh tokens.");
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user