62 lines
1.9 KiB
Python
62 lines
1.9 KiB
Python
"""
|
|
Example: Scraping dynamic content with Selenium.
|
|
"""
|
|
from scrapers.selenium_scraper import SeleniumScraper
|
|
import time
|
|
|
|
|
|
def scrape_dynamic_content():
|
|
"""
|
|
Example: Scrape JavaScript-rendered content
|
|
"""
|
|
with SeleniumScraper(headless=True) as scraper:
|
|
# Example with a site that loads content dynamically
|
|
result = scraper.scrape(
|
|
"http://quotes.toscrape.com/js/",
|
|
wait_for=".quote"
|
|
)
|
|
|
|
if result["success"]:
|
|
soup = result["soup"]
|
|
quotes = soup.select(".quote")
|
|
|
|
print(f"Scraped {len(quotes)} quotes from JavaScript-rendered page")
|
|
|
|
# Extract quote details
|
|
for quote in quotes[:3]:
|
|
text = quote.select_one(".text").get_text(strip=True)
|
|
author = quote.select_one(".author").get_text(strip=True)
|
|
print(f"\n{text}\n - {author}")
|
|
else:
|
|
print(f"Scraping failed: {result.get('error')}")
|
|
|
|
|
|
def interact_with_page():
|
|
"""
|
|
Example: Interact with page elements (clicking, scrolling, etc.)
|
|
"""
|
|
with SeleniumScraper(headless=False) as scraper:
|
|
scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
# Scroll down
|
|
scraper.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
|
time.sleep(1)
|
|
|
|
# Click "Next" button if exists
|
|
try:
|
|
scraper.click_element(".next > a")
|
|
time.sleep(2)
|
|
|
|
print(f"Navigated to: {scraper.driver.current_url}")
|
|
except Exception as e:
|
|
print(f"Could not click next: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=== Selenium Dynamic Content Example ===\n")
|
|
scrape_dynamic_content()
|
|
|
|
print("\n=== Selenium Interaction Example ===\n")
|
|
# Uncomment to see browser interaction (non-headless)
|
|
# interact_with_page()
|
|
|