106 lines
3.1 KiB
Python
106 lines
3.1 KiB
Python
"""
|
|
Example: Advanced scraping with Jina, Firecrawl, AgentQL, and Multion.
|
|
"""
|
|
from scrapers.jina_scraper import JinaScraper
|
|
from scrapers.firecrawl_scraper import FirecrawlScraper
|
|
from scrapers.agentql_scraper import AgentQLScraper
|
|
from scrapers.multion_scraper import MultionScraper
|
|
|
|
|
|
def jina_example():
|
|
"""
|
|
Example: Use Jina for AI-driven text extraction
|
|
"""
|
|
print("=== Jina AI Example ===\n")
|
|
|
|
with JinaScraper() as scraper:
|
|
result = scraper.scrape(
|
|
"https://example.com",
|
|
return_format="markdown"
|
|
)
|
|
|
|
if result["success"]:
|
|
print("Extracted content (first 500 chars):")
|
|
print(result["content"][:500])
|
|
else:
|
|
print(f"Error: {result.get('error')}")
|
|
|
|
|
|
def firecrawl_example():
|
|
"""
|
|
Example: Use Firecrawl for deep crawling
|
|
"""
|
|
print("\n=== Firecrawl Example ===\n")
|
|
|
|
with FirecrawlScraper() as scraper:
|
|
# Scrape a single page
|
|
result = scraper.scrape("https://example.com")
|
|
|
|
if result["success"]:
|
|
print(f"Scraped content length: {len(result.get('content', ''))}")
|
|
|
|
# Crawl multiple pages
|
|
crawl_result = scraper.crawl(
|
|
"https://example.com",
|
|
max_depth=2,
|
|
max_pages=5
|
|
)
|
|
|
|
if crawl_result["success"]:
|
|
print(f"Crawled {crawl_result['total_pages']} pages")
|
|
|
|
|
|
def agentql_example():
|
|
"""
|
|
Example: Use AgentQL for complex workflows
|
|
"""
|
|
print("\n=== AgentQL Example ===\n")
|
|
|
|
with AgentQLScraper() as scraper:
|
|
# Example login workflow
|
|
workflow = [
|
|
{"action": "navigate", "params": {"url": "https://example.com/login"}},
|
|
{"action": "fill_form", "params": {"field": "#username", "value": "user@example.com"}},
|
|
{"action": "fill_form", "params": {"field": "#password", "value": "password123"}},
|
|
{"action": "click", "params": {"element": "#submit"}},
|
|
{"action": "extract", "params": {"selector": ".dashboard-content"}}
|
|
]
|
|
|
|
result = scraper.scrape("https://example.com/login", workflow)
|
|
|
|
if result["success"]:
|
|
print(f"Workflow executed: {len(result['workflow_results'])} steps")
|
|
|
|
|
|
def multion_example():
|
|
"""
|
|
Example: Use Multion for exploratory tasks
|
|
"""
|
|
print("\n=== Multion Example ===\n")
|
|
|
|
with MultionScraper() as scraper:
|
|
# Example: Find best deal
|
|
result = scraper.find_best_deal(
|
|
search_query="wireless headphones",
|
|
filters={"max_price": 100, "rating": "4+"}
|
|
)
|
|
|
|
if result["success"]:
|
|
print(f"Task result: {result.get('final_result')}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Note: These examples require API keys to be set in .env file
|
|
|
|
print("Advanced Scraping Examples")
|
|
print("=" * 50)
|
|
|
|
# Uncomment the examples you want to run:
|
|
|
|
# jina_example()
|
|
# firecrawl_example()
|
|
# agentql_example()
|
|
# multion_example()
|
|
|
|
print("\nNote: Set API keys in .env file to run these examples")
|
|
|