stremio-sekai/tests/test_basic_scraper.py
2025-10-31 19:03:17 +01:00

64 lines
1.8 KiB
Python

"""
Tests for BasicScraper.
"""
import pytest
from scrapers.basic_scraper import BasicScraper
def test_basic_scraper_initialization():
"""Test BasicScraper initialization."""
scraper = BasicScraper()
assert scraper is not None
assert scraper.session is not None
scraper.cleanup()
def test_basic_scrape_success():
"""Test successful scraping of a static page."""
with BasicScraper() as scraper:
result = scraper.scrape("http://quotes.toscrape.com/")
assert result["success"] is True
assert result["status_code"] == 200
assert "html" in result
assert "soup" in result
assert result["soup"] is not None
def test_basic_scrape_failure():
"""Test scraping with invalid URL."""
with BasicScraper() as scraper:
result = scraper.scrape("http://invalid-url-that-does-not-exist.com/")
assert result["success"] is False
assert "error" in result
def test_extract_text():
"""Test text extraction from BeautifulSoup object."""
with BasicScraper() as scraper:
result = scraper.scrape("http://quotes.toscrape.com/")
if result["success"]:
texts = scraper.extract_text(result["soup"], ".text")
assert len(texts) > 0
assert isinstance(texts[0], str)
def test_extract_links():
"""Test link extraction."""
with BasicScraper() as scraper:
result = scraper.scrape("http://quotes.toscrape.com/")
if result["success"]:
links = scraper.extract_links(
result["soup"],
base_url="http://quotes.toscrape.com/"
)
assert len(links) > 0
assert all(link.startswith("http") for link in links)
if __name__ == "__main__":
pytest.main([__file__, "-v"])