stremio-sekai/config.py
2025-10-31 19:03:17 +01:00

57 lines
1.6 KiB
Python

"""
Configuration module for web scraping project.
Loads environment variables and defines project-wide settings.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Project Paths
BASE_DIR = Path(__file__).resolve().parent
DATA_DIR = BASE_DIR / "data"
LOGS_DIR = BASE_DIR / "logs"
CACHE_DIR = BASE_DIR / "cache"
# Create directories if they don't exist
DATA_DIR.mkdir(exist_ok=True)
LOGS_DIR.mkdir(exist_ok=True)
CACHE_DIR.mkdir(exist_ok=True)
# API Keys
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", "")
AGENTQL_API_KEY = os.getenv("AGENTQL_API_KEY", "")
MULTION_API_KEY = os.getenv("MULTION_API_KEY", "")
TWOCAPTCHA_API_KEY = os.getenv("TWOCAPTCHA_API_KEY", "")
# Scraping Configuration
RATE_LIMIT_DELAY = float(os.getenv("RATE_LIMIT_DELAY", 2))
MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3))
TIMEOUT = int(os.getenv("TIMEOUT", 30))
USER_AGENT = os.getenv(
"USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
# Request Headers
DEFAULT_HEADERS = {
"User-Agent": USER_AGENT,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1"
}
# Selenium Configuration
SELENIUM_HEADLESS = True
SELENIUM_IMPLICIT_WAIT = 10
# Cache Configuration
CACHE_EXPIRATION = 3600 # 1 hour in seconds