66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
"""
|
|
Example: Basic web scraping with requests and BeautifulSoup.
|
|
"""
|
|
from scrapers.basic_scraper import BasicScraper
|
|
import json
|
|
|
|
|
|
def scrape_quotes():
|
|
"""
|
|
Example: Scrape quotes from quotes.toscrape.com
|
|
"""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
if result["success"]:
|
|
soup = result["soup"]
|
|
|
|
# Extract all quotes
|
|
quotes = []
|
|
for quote_elem in soup.select(".quote"):
|
|
text = quote_elem.select_one(".text").get_text(strip=True)
|
|
author = quote_elem.select_one(".author").get_text(strip=True)
|
|
tags = [tag.get_text(strip=True) for tag in quote_elem.select(".tag")]
|
|
|
|
quotes.append({
|
|
"text": text,
|
|
"author": author,
|
|
"tags": tags
|
|
})
|
|
|
|
print(f"Scraped {len(quotes)} quotes")
|
|
print(json.dumps(quotes[:3], indent=2)) # Print first 3 quotes
|
|
|
|
return quotes
|
|
else:
|
|
print(f"Scraping failed: {result.get('error')}")
|
|
return []
|
|
|
|
|
|
def scrape_with_links():
|
|
"""
|
|
Example: Extract all links from a page
|
|
"""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
if result["success"]:
|
|
links = scraper.extract_links(
|
|
result["soup"],
|
|
base_url="http://quotes.toscrape.com/"
|
|
)
|
|
|
|
print(f"Found {len(links)} links")
|
|
for link in links[:10]: # Print first 10 links
|
|
print(f" - {link}")
|
|
|
|
return links
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=== Basic Scraping Example ===\n")
|
|
scrape_quotes()
|
|
|
|
print("\n=== Link Extraction Example ===\n")
|
|
scrape_with_links()
|
|
|