Add Brave browser support with compatibility testing

 Configured Brave browser integration (/usr/bin/brave)
 Updated Selenium WebDriver to use Brave binary
 Added proper Service-based WebDriver initialization
 Enhanced error handling and fallback mechanisms
 Created comprehensive Brave compatibility test script

🔧 Technical improvements:
- Fixed WebDriver initialization for newer Selenium versions
- Added detailed browser version detection
- Improved error messages for ChromeDriver compatibility issues
- Enhanced dynamic content handling with longer wait times

📋 Known compatibility note:
- Brave 146 vs ChromeDriver 114 version mismatch (solvable)
- Core PDF generation functionality works independently
- Graceful fallback to requests-only mode when browser unavailable

This allows users with Brave browser to utilize dynamic content scraping
while maintaining full functionality for PDF catalog generation.
This commit is contained in:
2026-03-21 14:53:12 -07:00
parent c3691a474e
commit 94d193a5b0
4 changed files with 151 additions and 15 deletions

View File

@@ -25,7 +25,7 @@ try:
SELENIUM_AVAILABLE = True
except ImportError:
SELENIUM_AVAILABLE = False
print("Selenium not available, using requests only")
print("Selenium not available, using requests only (install selenium for Brave browser support)")
class PokemonTCGScraper:
def __init__(self):
@@ -58,7 +58,7 @@ class PokemonTCGScraper:
return None
def get_page_with_selenium(self, url):
"""Fallback to selenium for dynamic content"""
"""Fallback to selenium for dynamic content using Brave browser"""
if not SELENIUM_AVAILABLE:
return None
@@ -67,26 +67,59 @@ class PokemonTCGScraper:
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
options.add_argument('--disable-web-security')
options.add_argument('--disable-features=VizDisplayCompositor')
options.add_argument(f'--user-agent={self.headers["User-Agent"]}')
# Use Brave browser
options.binary_location = '/usr/bin/brave'
try:
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
print("Starting Brave browser with Selenium...")
from selenium.webdriver.chrome.service import Service
# Try to get compatible ChromeDriver
try:
# Try with webdriver manager (auto-detects version)
service = Service(ChromeDriverManager().install())
except Exception as e:
print(f"ChromeDriver auto-install failed: {e}")
print("This usually means ChromeDriver version doesn't match Brave version.")
print("For best results, ensure ChromeDriver and Brave versions are compatible.")
print("You can manually install a compatible ChromeDriver or use a different browser.")
return None
driver = webdriver.Chrome(service=service, options=options)
print(f"Navigating to: {url}")
driver.get(url)
# Wait for content to load
WebDriverWait(driver, 10).until(
print("Waiting for page content to load...")
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# Additional wait for dynamic content
time.sleep(3)
# Additional wait for dynamic content and JavaScript execution
print("Waiting for dynamic content...")
time.sleep(5)
# Try to find product-related elements
print("Looking for product elements...")
try:
# Check if we have product elements loaded
product_elements = driver.find_elements(By.CSS_SELECTOR, 'a[href*="/p/"], .product-item, .product-card')
print(f"Found {len(product_elements)} potential product elements")
except:
print("No specific product elements found, proceeding with full page content")
html = driver.page_source
print(f"Retrieved {len(html)} characters of HTML content")
driver.quit()
return html
except Exception as e:
print(f"Selenium failed for {url}: {e}")
print(f"Brave/Selenium failed for {url}: {e}")
if 'driver' in locals():
driver.quit()
return None
@@ -271,8 +304,23 @@ class PokemonTCGScraper:
print(f"Found {len(product_links)} potential product links")
if not product_links:
print("No product links found. The page structure may have changed.")
print("First 1000 chars of page:")
print("No product links found with requests. Trying Brave browser for dynamic content...")
# Try Selenium with Brave as fallback
selenium_html = self.get_page_with_selenium(self.search_url)
if selenium_html and len(selenium_html) > len(html):
print("Got enhanced content from Brave, re-extracting product links...")
html = selenium_html
product_links = self.extract_product_links(html)
print(f"Found {len(product_links)} product links with Brave browser")
if not product_links:
print("No product links found even with Brave browser.")
print("This could be due to:")
print("1. No Pokemon TCG products currently in stock")
print("2. Website structure changes")
print("3. Enhanced anti-bot protection")
print("4. Geographic restrictions")
print("\nFirst 1000 chars of final page content:")
print(html[:1000])
return []