64 lines
1.8 KiB
Python
64 lines
1.8 KiB
Python
"""
|
|
Tests for BasicScraper.
|
|
"""
|
|
import pytest
|
|
from scrapers.basic_scraper import BasicScraper
|
|
|
|
|
|
def test_basic_scraper_initialization():
|
|
"""Test BasicScraper initialization."""
|
|
scraper = BasicScraper()
|
|
assert scraper is not None
|
|
assert scraper.session is not None
|
|
scraper.cleanup()
|
|
|
|
|
|
def test_basic_scrape_success():
|
|
"""Test successful scraping of a static page."""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
assert result["success"] is True
|
|
assert result["status_code"] == 200
|
|
assert "html" in result
|
|
assert "soup" in result
|
|
assert result["soup"] is not None
|
|
|
|
|
|
def test_basic_scrape_failure():
|
|
"""Test scraping with invalid URL."""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://invalid-url-that-does-not-exist.com/")
|
|
|
|
assert result["success"] is False
|
|
assert "error" in result
|
|
|
|
|
|
def test_extract_text():
|
|
"""Test text extraction from BeautifulSoup object."""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
if result["success"]:
|
|
texts = scraper.extract_text(result["soup"], ".text")
|
|
assert len(texts) > 0
|
|
assert isinstance(texts[0], str)
|
|
|
|
|
|
def test_extract_links():
|
|
"""Test link extraction."""
|
|
with BasicScraper() as scraper:
|
|
result = scraper.scrape("http://quotes.toscrape.com/")
|
|
|
|
if result["success"]:
|
|
links = scraper.extract_links(
|
|
result["soup"],
|
|
base_url="http://quotes.toscrape.com/"
|
|
)
|
|
assert len(links) > 0
|
|
assert all(link.startswith("http") for link in links)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|
|
|