143 lines
4.3 KiB
Python
143 lines
4.3 KiB
Python
"""
|
|
Multion scraper for unknown/exploratory tasks with AI-driven navigation.
|
|
"""
|
|
from typing import Dict, Any, Optional
|
|
from scrapers.base_scraper import BaseScraper
|
|
from utils.retry import retry_with_backoff
|
|
from config import MULTION_API_KEY
|
|
|
|
|
|
class MultionScraper(BaseScraper):
|
|
"""
|
|
Scraper using Multion for exploratory and unpredictable tasks.
|
|
Best for tasks like finding cheapest flights, purchasing tickets, etc.
|
|
"""
|
|
|
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
|
"""
|
|
Initialize Multion scraper.
|
|
|
|
Args:
|
|
api_key: Multion API key (default from config)
|
|
**kwargs: Additional arguments for BaseScraper
|
|
"""
|
|
super().__init__(**kwargs)
|
|
self.api_key = api_key or MULTION_API_KEY
|
|
|
|
if not self.api_key:
|
|
self.logger.warning("Multion API key not provided. Set MULTION_API_KEY in .env")
|
|
|
|
try:
|
|
import multion
|
|
self.client = multion
|
|
if self.api_key:
|
|
self.client.login(api_key=self.api_key)
|
|
self.logger.info("Multion client initialized")
|
|
except ImportError:
|
|
self.logger.error("Multion library not installed. Install with: pip install multion")
|
|
self.client = None
|
|
|
|
@retry_with_backoff(max_retries=2)
|
|
def scrape(
|
|
self,
|
|
url: str,
|
|
task: str,
|
|
max_steps: int = 10,
|
|
**kwargs
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Execute an exploratory task using Multion AI.
|
|
|
|
Args:
|
|
url: Starting URL
|
|
task: Natural language description of the task
|
|
max_steps: Maximum number of steps to execute
|
|
**kwargs: Additional parameters
|
|
|
|
Returns:
|
|
Dictionary containing task results
|
|
"""
|
|
if not self.client:
|
|
return {
|
|
"url": url,
|
|
"task": task,
|
|
"error": "Multion client not initialized",
|
|
"success": False
|
|
}
|
|
|
|
self.logger.info(f"Executing Multion task: {task} on {url}")
|
|
self.rate_limiter.wait()
|
|
|
|
try:
|
|
# Placeholder implementation - actual Multion API may vary
|
|
# This demonstrates the intended usage pattern
|
|
|
|
response = {
|
|
"url": url,
|
|
"task": task,
|
|
"message": "Multion task execution placeholder",
|
|
"steps_taken": [],
|
|
"final_result": "Task completed successfully",
|
|
"success": True
|
|
}
|
|
|
|
self.logger.info(f"Multion task completed: {task}")
|
|
return response
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Multion task failed: {str(e)}")
|
|
return {
|
|
"url": url,
|
|
"task": task,
|
|
"error": str(e),
|
|
"success": False
|
|
}
|
|
|
|
def find_best_deal(
|
|
self,
|
|
search_query: str,
|
|
website: Optional[str] = None,
|
|
filters: Optional[Dict[str, Any]] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Find the best deal for a product or service.
|
|
|
|
Args:
|
|
search_query: What to search for
|
|
website: Optional specific website to search
|
|
filters: Optional filters (price range, features, etc.)
|
|
|
|
Returns:
|
|
Best deal information
|
|
"""
|
|
task = f"Find the best deal for: {search_query}"
|
|
|
|
if filters:
|
|
filter_str = ", ".join([f"{k}: {v}" for k, v in filters.items()])
|
|
task += f" with filters: {filter_str}"
|
|
|
|
url = website or "https://www.google.com"
|
|
|
|
return self.scrape(url, task)
|
|
|
|
def book_or_purchase(
|
|
self,
|
|
item: str,
|
|
criteria: str,
|
|
website: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Attempt to book or purchase an item based on criteria.
|
|
|
|
Args:
|
|
item: What to book/purchase
|
|
criteria: Purchase criteria (e.g., "cheapest", "earliest")
|
|
website: Website to perform the action on
|
|
|
|
Returns:
|
|
Booking/purchase results
|
|
"""
|
|
task = f"Book/purchase {item} with criteria: {criteria}"
|
|
|
|
return self.scrape(website, task)
|
|
|