Sources are now completely changed to use a more flexible API. This uses a fully native source system, so there will be 0 overhead on resource usage and performance. JSON objects specify what is fetched and displayed by Ferrite when searching torrents. Sources now include sizes, seeders, and leechers for any site that specifies them. The versioning and repo naming framework has been added, but will be displayed in another update. API support will be included in another update. Signed-off-by: kingbri <bdashore3@gmail.com>
287 lines
10 KiB
Swift
287 lines
10 KiB
Swift
//
|
|
// ScrapingViewModel.swift
|
|
// Ferrite
|
|
//
|
|
// Created by Brian Dashore on 7/4/22.
|
|
//
|
|
|
|
import Base32
|
|
import Regex
|
|
import SwiftSoup
|
|
import SwiftUI
|
|
|
|
public struct SearchResult: Hashable, Codable {
|
|
let title: String
|
|
let source: String
|
|
let size: String
|
|
let magnetLink: String
|
|
let magnetHash: String?
|
|
let seeders: String?
|
|
let leechers: String?
|
|
}
|
|
|
|
class ScrapingViewModel: ObservableObject {
|
|
@AppStorage("RealDebrid.Enabled") var realDebridEnabled = false
|
|
|
|
// Link the toast view model for single-directional communication
|
|
var toastModel: ToastViewModel?
|
|
|
|
@Published var searchResults: [SearchResult] = []
|
|
@Published var searchText: String = ""
|
|
@Published var selectedSearchResult: SearchResult?
|
|
@Published var filteredSource: Source?
|
|
|
|
@MainActor
|
|
public func scanSources(sources: [Source]) async {
|
|
if sources.isEmpty {
|
|
print("Sources empty")
|
|
return
|
|
}
|
|
|
|
var tempResults: [SearchResult] = []
|
|
|
|
for source in sources {
|
|
if source.enabled {
|
|
if let htmlParser = source.htmlParser {
|
|
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
|
toastModel?.toastDescription = "Could not process search query, invalid characters present."
|
|
print("Could not process search query, invalid characters present")
|
|
|
|
continue
|
|
}
|
|
|
|
let urlString = source.baseUrl + htmlParser.searchUrl.replacingOccurrences(of: "{query}", with: encodedQuery)
|
|
|
|
guard let html = await fetchWebsiteHtml(urlString: urlString) else {
|
|
continue
|
|
}
|
|
|
|
let sourceResults = await scrapeWebsite(source: source, html: html)
|
|
tempResults += sourceResults
|
|
}
|
|
}
|
|
}
|
|
|
|
searchResults = tempResults
|
|
}
|
|
|
|
// Fetches the HTML for a URL
|
|
@MainActor
|
|
public func fetchWebsiteHtml(urlString: String) async -> String? {
|
|
guard let url = URL(string: urlString) else {
|
|
toastModel?.toastDescription = "Source doesn't contain a valid URL, contact the source dev!"
|
|
print("Source doesn't contain a valid URL, contact the source dev!")
|
|
|
|
return nil
|
|
}
|
|
|
|
do {
|
|
let (data, _) = try await URLSession.shared.data(from: url)
|
|
let html = String(data: data, encoding: .ascii)
|
|
return html
|
|
} catch {
|
|
toastModel?.toastDescription = "Error in fetching HTML \(error)"
|
|
print("Error in fetching HTML \(error)")
|
|
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Returns results to UI
|
|
// Results must have a link and title, but other parameters aren't required
|
|
@MainActor
|
|
public func scrapeWebsite(source: Source, html: String) async -> [SearchResult] {
|
|
guard let htmlParser = source.htmlParser else {
|
|
return []
|
|
}
|
|
|
|
var rows = Elements()
|
|
|
|
do {
|
|
let document = try SwiftSoup.parse(html)
|
|
rows = try document.select(htmlParser.rows)
|
|
} catch {
|
|
toastModel?.toastDescription = "Scraping error, couldn't fetch rows: \(error)"
|
|
print("Scraping error, couldn't fetch rows: \(error)")
|
|
|
|
return []
|
|
}
|
|
|
|
var tempResults: [SearchResult] = []
|
|
|
|
// If there's an error, continue instead of returning with nothing
|
|
for row in rows {
|
|
do {
|
|
// Fetches the magnet link
|
|
// If the magnet is located on an external page, fetch the external page and grab the magnet link
|
|
// External page fetching affects source performance
|
|
guard let magnetParser = htmlParser.magnet else {
|
|
continue
|
|
}
|
|
|
|
var href: String
|
|
if let externalMagnetQuery = magnetParser.externalLinkQuery, !externalMagnetQuery.isEmpty {
|
|
guard let externalMagnetLink = try row.select(externalMagnetQuery).first()?.attr("href") else {
|
|
continue
|
|
}
|
|
|
|
guard let magnetHtml = await fetchWebsiteHtml(urlString: source.baseUrl + externalMagnetLink) else {
|
|
continue
|
|
}
|
|
|
|
let magnetDocument = try SwiftSoup.parse(magnetHtml)
|
|
guard let linkResult = try magnetDocument.select(magnetParser.query).first() else {
|
|
continue
|
|
}
|
|
|
|
if magnetParser.attribute == "text" {
|
|
href = try linkResult.text()
|
|
} else {
|
|
href = try linkResult.attr(magnetParser.attribute)
|
|
}
|
|
} else {
|
|
guard let link = try runComplexQuery(
|
|
row: row,
|
|
query: magnetParser.query,
|
|
attribute: magnetParser.attribute,
|
|
regexString: magnetParser.regex
|
|
) else {
|
|
continue
|
|
}
|
|
|
|
href = link
|
|
}
|
|
|
|
if !href.starts(with: "magnet:") {
|
|
continue
|
|
}
|
|
|
|
// Fetches the magnet hash
|
|
let magnetHash = fetchMagnetHash(magnetLink: href)
|
|
|
|
// Fetches the episode/movie title
|
|
var title: String?
|
|
if let titleParser = htmlParser.title {
|
|
title = try? runComplexQuery(
|
|
row: row,
|
|
query: titleParser.query,
|
|
attribute: titleParser.attribute,
|
|
regexString: titleParser.regex
|
|
)
|
|
}
|
|
|
|
// Fetches the torrent's size
|
|
var size: String?
|
|
if let sizeParser = htmlParser.size {
|
|
size = try? runComplexQuery(
|
|
row: row,
|
|
query: sizeParser.query,
|
|
attribute: sizeParser.attribute,
|
|
regexString: sizeParser.regex
|
|
)
|
|
}
|
|
|
|
// Fetches seeders and leechers if there are any
|
|
var seeders: String?
|
|
var leechers: String?
|
|
if let seederLeecher = htmlParser.seedLeech {
|
|
if let combinedQuery = seederLeecher.combined {
|
|
if let combinedString = try? runComplexQuery(
|
|
row: row,
|
|
query: combinedQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: nil
|
|
) {
|
|
if let seederRegex = seederLeecher.seederRegex, let leecherRegex = seederLeecher.leecherRegex {
|
|
// Seeder regex matching
|
|
seeders = try? Regex(seederRegex).firstMatch(in: combinedString)?.groups[safe: 0]?.value
|
|
|
|
// Leecher regex matching
|
|
leechers = try? Regex(leecherRegex).firstMatch(in: combinedString)?.groups[safe: 0]?.value
|
|
}
|
|
}
|
|
} else {
|
|
if let seederQuery = seederLeecher.seeders {
|
|
seeders = try? runComplexQuery(
|
|
row: row,
|
|
query: seederQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: seederLeecher.seederRegex
|
|
)
|
|
}
|
|
|
|
if let leecherQuery = seederLeecher.seeders {
|
|
leechers = try? runComplexQuery(
|
|
row: row,
|
|
query: leecherQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: seederLeecher.leecherRegex
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
let result = SearchResult(
|
|
title: title ?? "No title",
|
|
source: source.name,
|
|
size: size ?? "",
|
|
magnetLink: href,
|
|
magnetHash: magnetHash,
|
|
seeders: seeders,
|
|
leechers: leechers
|
|
)
|
|
|
|
tempResults.append(result)
|
|
} catch {
|
|
toastModel?.toastDescription = "Scraping error: \(error)"
|
|
print("Scraping error: \(error)")
|
|
|
|
continue
|
|
}
|
|
}
|
|
|
|
return tempResults
|
|
}
|
|
|
|
func runComplexQuery(row: Element, query: String, attribute: String, regexString: String?) throws -> String? {
|
|
var parsedValue: String?
|
|
|
|
let result = try row.select(query).first()
|
|
|
|
switch attribute {
|
|
case "text":
|
|
parsedValue = try result?.text()
|
|
default:
|
|
parsedValue = try result?.attr(attribute)
|
|
}
|
|
|
|
// A capture group must be used in the provided regex
|
|
if let regexString = regexString,
|
|
let parsedValue = parsedValue,
|
|
let regexValue = try Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value
|
|
{
|
|
return regexValue
|
|
} else {
|
|
return parsedValue
|
|
}
|
|
}
|
|
|
|
// Fetches and possibly converts the magnet hash value to sha1
|
|
public func fetchMagnetHash(magnetLink: String) -> String? {
|
|
guard let firstSplit = magnetLink.split(separator: ":")[safe: 3] else {
|
|
return nil
|
|
}
|
|
|
|
guard let magnetHash = firstSplit.split(separator: "&")[safe: 0] else {
|
|
return nil
|
|
}
|
|
|
|
// Is this a Base32hex hash?
|
|
if magnetHash.count == 32 {
|
|
let decryptedMagnetHash = base32DecodeToData(String(magnetHash))
|
|
return decryptedMagnetHash?.hexEncodedString()
|
|
} else {
|
|
return String(magnetHash).lowercased()
|
|
}
|
|
}
|
|
}
|