mirror of
https://github.com/Ferrite-iOS/Ferrite.git
synced 2026-03-11 17:45:40 +00:00
If a user searched after cancelling the search the first time, the first search would still continue. Assign the search task to navigation view and automatically cancel it and dismiss the searchbar when the user switches to a different tab. Also add a ProgressView to show which source is being parsed. Signed-off-by: kingbri <bdashore3@gmail.com>
539 lines
20 KiB
Swift
539 lines
20 KiB
Swift
//
|
|
// ScrapingViewModel.swift
|
|
// Ferrite
|
|
//
|
|
// Created by Brian Dashore on 7/4/22.
|
|
//
|
|
|
|
import Base32
|
|
import Regex
|
|
import SwiftSoup
|
|
import SwiftUI
|
|
|
|
public struct SearchResult: Hashable, Codable {
|
|
let title: String
|
|
let source: String
|
|
let size: String
|
|
let magnetLink: String
|
|
let magnetHash: String?
|
|
let seeders: String?
|
|
let leechers: String?
|
|
}
|
|
|
|
class ScrapingViewModel: ObservableObject {
|
|
@AppStorage("RealDebrid.Enabled") var realDebridEnabled = false
|
|
|
|
// Link the toast view model for single-directional communication
|
|
var toastModel: ToastViewModel?
|
|
let byteCountFormatter: ByteCountFormatter = .init()
|
|
|
|
@Published var runningSearchTask: Task<Void, Error>?
|
|
@Published var searchResults: [SearchResult] = []
|
|
@Published var searchText: String = ""
|
|
@Published var selectedSearchResult: SearchResult?
|
|
@Published var filteredSource: Source?
|
|
@Published var currentSourceName: String?
|
|
|
|
@MainActor
|
|
public func scanSources(sources: [Source]) async {
|
|
if sources.isEmpty {
|
|
Task { @MainActor in
|
|
toastModel?.toastType = .info
|
|
toastModel?.toastDescription = "There are no sources to search!"
|
|
}
|
|
|
|
print("Sources empty")
|
|
return
|
|
}
|
|
|
|
var tempResults: [SearchResult] = []
|
|
|
|
for source in sources {
|
|
if source.enabled {
|
|
currentSourceName = source.name
|
|
|
|
// Default to HTML scraping
|
|
let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none
|
|
|
|
switch preferredParser {
|
|
case .scraping:
|
|
if let htmlParser = source.htmlParser {
|
|
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
|
toastModel?.toastDescription = "Could not process search query, invalid characters present."
|
|
print("Could not process search query, invalid characters present")
|
|
|
|
continue
|
|
}
|
|
|
|
let urlString = source.baseUrl + htmlParser.searchUrl.replacingOccurrences(of: "{query}", with: encodedQuery)
|
|
|
|
guard let html = await fetchWebsiteData(urlString: urlString) else {
|
|
continue
|
|
}
|
|
|
|
let sourceResults = await scrapeHtml(source: source, html: html)
|
|
tempResults += sourceResults
|
|
}
|
|
case .rss:
|
|
if let rssParser = source.rssParser {
|
|
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
|
toastModel?.toastDescription = "Could not process search query, invalid characters present."
|
|
print("Could not process search query, invalid characters present")
|
|
|
|
continue
|
|
}
|
|
|
|
let replacedSearchUrl = rssParser.searchUrl.replacingOccurrences(of: "{query}", with: encodedQuery)
|
|
|
|
// If there is an RSS base URL, use that instead
|
|
var urlString: String
|
|
if let rssUrl = rssParser.rssUrl {
|
|
urlString = rssUrl + replacedSearchUrl
|
|
} else {
|
|
urlString = source.baseUrl + replacedSearchUrl
|
|
}
|
|
|
|
guard let rss = await fetchWebsiteData(urlString: urlString) else {
|
|
continue
|
|
}
|
|
|
|
let sourceResults = scrapeRss(source: source, rss: rss)
|
|
tempResults += sourceResults
|
|
}
|
|
case .siteApi, .none:
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the task is cancelled, return
|
|
if let searchTask = runningSearchTask, searchTask.isCancelled {
|
|
return
|
|
}
|
|
|
|
searchResults = tempResults
|
|
}
|
|
|
|
// Fetches the data for a URL
|
|
@MainActor
|
|
public func fetchWebsiteData(urlString: String) async -> String? {
|
|
guard let url = URL(string: urlString) else {
|
|
toastModel?.toastDescription = "Source doesn't contain a valid URL, contact the source dev!"
|
|
print("Source doesn't contain a valid URL, contact the source dev!")
|
|
|
|
return nil
|
|
}
|
|
|
|
do {
|
|
let (data, _) = try await URLSession.shared.data(from: url)
|
|
let html = String(data: data, encoding: .ascii)
|
|
return html
|
|
} catch {
|
|
let error = error as NSError
|
|
|
|
switch error.code {
|
|
case -999:
|
|
toastModel?.toastType = .info
|
|
toastModel?.toastDescription = "Search cancelled"
|
|
default:
|
|
toastModel?.toastDescription = "Error in fetching data \(error)"
|
|
}
|
|
print("Error in fetching data \(error)")
|
|
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// RSS feed scraper
|
|
@MainActor
|
|
public func scrapeRss(source: Source, rss: String) -> [SearchResult] {
|
|
guard let rssParser = source.rssParser else {
|
|
return []
|
|
}
|
|
|
|
var tempResults: [SearchResult] = []
|
|
|
|
var items = Elements()
|
|
|
|
do {
|
|
let document = try SwiftSoup.parse(rss, "", Parser.xmlParser())
|
|
items = try document.getElementsByTag("item")
|
|
} catch {
|
|
toastModel?.toastDescription = "RSS scraping error, couldn't fetch items: \(error)"
|
|
print("RSS scraping error, couldn't fetch items: \(error)")
|
|
|
|
return []
|
|
}
|
|
|
|
for item in items {
|
|
// Parse magnet link or translate hash
|
|
var magnetHash: String?
|
|
if let magnetHashParser = rssParser.magnetHash {
|
|
magnetHash = try? runRssComplexQuery(
|
|
item: item,
|
|
query: magnetHashParser.query,
|
|
attribute: magnetHashParser.attribute,
|
|
lookupAttribute: magnetHashParser.lookupAttribute,
|
|
regexString: magnetHashParser.regex
|
|
)
|
|
}
|
|
|
|
var title: String?
|
|
if let titleParser = rssParser.title {
|
|
title = try? runRssComplexQuery(
|
|
item: item,
|
|
query: titleParser.query,
|
|
attribute: titleParser.attribute,
|
|
lookupAttribute: titleParser.lookupAttribute,
|
|
regexString: titleParser.regex
|
|
)
|
|
}
|
|
|
|
var link: String?
|
|
if let magnetLinkParser = rssParser.magnetLink {
|
|
link = try? runRssComplexQuery(
|
|
item: item,
|
|
query: magnetLinkParser.query,
|
|
attribute: magnetLinkParser.attribute,
|
|
lookupAttribute: magnetLinkParser.lookupAttribute,
|
|
regexString: magnetLinkParser.regex
|
|
)
|
|
} else if let magnetHash = magnetHash {
|
|
link = generateMagnetLink(magnetHash: magnetHash, title: title, trackers: rssParser.trackerArray)
|
|
} else {
|
|
continue
|
|
}
|
|
|
|
guard let href = link, href.starts(with: "magnet:") else {
|
|
continue
|
|
}
|
|
|
|
if magnetHash == nil {
|
|
magnetHash = fetchMagnetHash(magnetLink: href)
|
|
}
|
|
|
|
var size: String?
|
|
if let sizeParser = rssParser.size {
|
|
size = try? runRssComplexQuery(
|
|
item: item,
|
|
query: sizeParser.query,
|
|
attribute: sizeParser.attribute,
|
|
lookupAttribute: sizeParser.lookupAttribute,
|
|
regexString: sizeParser.regex
|
|
)
|
|
}
|
|
|
|
if let sizeString = size, let sizeInt = Int64(sizeString) {
|
|
size = byteCountFormatter.string(fromByteCount: sizeInt)
|
|
}
|
|
|
|
var seeders: String?
|
|
var leechers: String?
|
|
if let seederLeecher = rssParser.seedLeech {
|
|
if let seederQuery = seederLeecher.seeders {
|
|
seeders = try? runRssComplexQuery(
|
|
item: item,
|
|
query: seederQuery,
|
|
attribute: seederLeecher.attribute,
|
|
lookupAttribute: seederLeecher.lookupAttribute,
|
|
regexString: seederLeecher.seederRegex
|
|
)
|
|
}
|
|
|
|
if let leecherQuery = seederLeecher.leechers {
|
|
leechers = try? runRssComplexQuery(
|
|
item: item,
|
|
query: leecherQuery,
|
|
attribute: seederLeecher.attribute,
|
|
lookupAttribute: seederLeecher.lookupAttribute,
|
|
regexString: seederLeecher.leecherRegex
|
|
)
|
|
}
|
|
}
|
|
|
|
let result = SearchResult(
|
|
title: title ?? "No title",
|
|
source: source.name,
|
|
size: size ?? "",
|
|
magnetLink: href,
|
|
magnetHash: magnetHash,
|
|
seeders: seeders,
|
|
leechers: leechers
|
|
)
|
|
|
|
tempResults.append(result)
|
|
}
|
|
|
|
return tempResults
|
|
}
|
|
|
|
// HTML scraper
|
|
@MainActor
|
|
public func scrapeHtml(source: Source, html: String) async -> [SearchResult] {
|
|
guard let htmlParser = source.htmlParser else {
|
|
return []
|
|
}
|
|
|
|
var rows = Elements()
|
|
|
|
do {
|
|
let document = try SwiftSoup.parse(html)
|
|
rows = try document.select(htmlParser.rows)
|
|
} catch {
|
|
toastModel?.toastDescription = "Scraping error, couldn't fetch rows: \(error)"
|
|
print("Scraping error, couldn't fetch rows: \(error)")
|
|
|
|
return []
|
|
}
|
|
|
|
var tempResults: [SearchResult] = []
|
|
|
|
// If there's an error, continue instead of returning with nothing
|
|
for row in rows {
|
|
do {
|
|
// Fetches the magnet link
|
|
// If the magnet is located on an external page, fetch the external page and grab the magnet link
|
|
// External page fetching affects source performance
|
|
guard let magnetParser = htmlParser.magnetLink else {
|
|
continue
|
|
}
|
|
|
|
var href: String
|
|
if let externalMagnetQuery = magnetParser.externalLinkQuery, !externalMagnetQuery.isEmpty {
|
|
guard let externalMagnetLink = try row.select(externalMagnetQuery).first()?.attr("href") else {
|
|
continue
|
|
}
|
|
|
|
guard let magnetHtml = await fetchWebsiteData(urlString: source.baseUrl + externalMagnetLink) else {
|
|
continue
|
|
}
|
|
|
|
let magnetDocument = try SwiftSoup.parse(magnetHtml)
|
|
guard let linkResult = try magnetDocument.select(magnetParser.query).first() else {
|
|
continue
|
|
}
|
|
|
|
if magnetParser.attribute == "text" {
|
|
href = try linkResult.text()
|
|
} else {
|
|
href = try linkResult.attr(magnetParser.attribute)
|
|
}
|
|
} else {
|
|
guard let link = try runHtmlComplexQuery(
|
|
row: row,
|
|
query: magnetParser.query,
|
|
attribute: magnetParser.attribute,
|
|
regexString: magnetParser.regex
|
|
) else {
|
|
continue
|
|
}
|
|
|
|
href = link
|
|
}
|
|
|
|
if !href.starts(with: "magnet:") {
|
|
continue
|
|
}
|
|
|
|
// Fetches the magnet hash
|
|
let magnetHash = fetchMagnetHash(magnetLink: href)
|
|
|
|
// Fetches the episode/movie title
|
|
var title: String?
|
|
if let titleParser = htmlParser.title {
|
|
title = try? runHtmlComplexQuery(
|
|
row: row,
|
|
query: titleParser.query,
|
|
attribute: titleParser.attribute,
|
|
regexString: titleParser.regex
|
|
)
|
|
}
|
|
|
|
// Fetches the torrent's size
|
|
// TODO: Add int translation
|
|
var size: String?
|
|
if let sizeParser = htmlParser.size {
|
|
size = try? runHtmlComplexQuery(
|
|
row: row,
|
|
query: sizeParser.query,
|
|
attribute: sizeParser.attribute,
|
|
regexString: sizeParser.regex
|
|
)
|
|
}
|
|
|
|
// Fetches seeders and leechers if there are any
|
|
var seeders: String?
|
|
var leechers: String?
|
|
if let seederLeecher = htmlParser.seedLeech {
|
|
if let combinedQuery = seederLeecher.combined {
|
|
if let combinedString = try? runHtmlComplexQuery(
|
|
row: row,
|
|
query: combinedQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: nil
|
|
) {
|
|
if let seederRegex = seederLeecher.seederRegex, let leecherRegex = seederLeecher.leecherRegex {
|
|
// Seeder regex matching
|
|
seeders = try? Regex(seederRegex).firstMatch(in: combinedString)?.groups[safe: 0]?.value
|
|
|
|
// Leecher regex matching
|
|
leechers = try? Regex(leecherRegex).firstMatch(in: combinedString)?.groups[safe: 0]?.value
|
|
}
|
|
}
|
|
} else {
|
|
if let seederQuery = seederLeecher.seeders {
|
|
seeders = try? runHtmlComplexQuery(
|
|
row: row,
|
|
query: seederQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: seederLeecher.seederRegex
|
|
)
|
|
}
|
|
|
|
if let leecherQuery = seederLeecher.seeders {
|
|
leechers = try? runHtmlComplexQuery(
|
|
row: row,
|
|
query: leecherQuery,
|
|
attribute: seederLeecher.attribute,
|
|
regexString: seederLeecher.leecherRegex
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
let result = SearchResult(
|
|
title: title ?? "No title",
|
|
source: source.name,
|
|
size: size ?? "",
|
|
magnetLink: href,
|
|
magnetHash: magnetHash,
|
|
seeders: seeders,
|
|
leechers: leechers
|
|
)
|
|
|
|
tempResults.append(result)
|
|
} catch {
|
|
toastModel?.toastDescription = "Scraping error: \(error)"
|
|
print("Scraping error: \(error)")
|
|
|
|
continue
|
|
}
|
|
}
|
|
|
|
return tempResults
|
|
}
|
|
|
|
// Complex query parsing for HTML scraping
|
|
func runHtmlComplexQuery(row: Element, query: String, attribute: String, regexString: String?) throws -> String? {
|
|
var parsedValue: String?
|
|
|
|
let result = try row.select(query).first()
|
|
|
|
switch attribute {
|
|
case "text":
|
|
parsedValue = try result?.text()
|
|
default:
|
|
parsedValue = try result?.attr(attribute)
|
|
}
|
|
|
|
// A capture group must be used in the provided regex
|
|
if let regexString = regexString,
|
|
let parsedValue = parsedValue,
|
|
let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value
|
|
{
|
|
return regexValue
|
|
} else {
|
|
return parsedValue
|
|
}
|
|
}
|
|
|
|
// Complex query parsing for RSS scraping
|
|
func runRssComplexQuery(item: Element, query: String, attribute: String, lookupAttribute: String?, regexString: String?) throws -> String? {
|
|
var parsedValue: String?
|
|
|
|
switch attribute {
|
|
case "text":
|
|
parsedValue = try item.getElementsByTag(query).first()?.text()
|
|
default:
|
|
// If there's a key/value to lookup the attribute with, query it. Othewise assume the value is in the same attribute
|
|
if let lookupAttribute = lookupAttribute {
|
|
let containerElement = try item.getElementsByAttributeValue(lookupAttribute, query).first()
|
|
parsedValue = try containerElement?.attr(attribute)
|
|
} else {
|
|
let containerElement = try item.getElementsByAttribute(attribute).first()
|
|
parsedValue = try containerElement?.attr(attribute)
|
|
}
|
|
}
|
|
|
|
// A capture group must be used in the provided regex
|
|
if let regexString = regexString,
|
|
let parsedValue = parsedValue,
|
|
let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value
|
|
{
|
|
return regexValue
|
|
} else {
|
|
return parsedValue
|
|
}
|
|
}
|
|
|
|
// Fetches and possibly converts the magnet hash value to sha1
|
|
public func fetchMagnetHash(magnetLink: String) -> String? {
|
|
guard let firstSplit = magnetLink.split(separator: ":")[safe: 3] else {
|
|
return nil
|
|
}
|
|
|
|
guard let magnetHash = firstSplit.split(separator: "&")[safe: 0] else {
|
|
return nil
|
|
}
|
|
|
|
// Is this a Base32hex hash?
|
|
if magnetHash.count == 32 {
|
|
let decryptedMagnetHash = base32DecodeToData(String(magnetHash))
|
|
return decryptedMagnetHash?.hexEncodedString()
|
|
} else {
|
|
return String(magnetHash).lowercased()
|
|
}
|
|
}
|
|
|
|
func parseSizeString(sizeString: String) -> String? {
|
|
// Test if the string can be a full integer
|
|
guard let size = Int(sizeString) else {
|
|
return nil
|
|
}
|
|
|
|
let length = sizeString.count
|
|
|
|
if length > 9 {
|
|
// This is a GB
|
|
return String("\(Double(size) / 1e9) GB")
|
|
} else if length > 6 {
|
|
// This is a MB
|
|
return String("\(Double(size) / 1e6) MB")
|
|
} else if length > 3 {
|
|
// This is a KB
|
|
return String("\(Double(size) / 1e3) KB")
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
public func generateMagnetLink(magnetHash: String, title: String?, trackers: [SourceTracker]) -> String {
|
|
var magnetLinkArray = ["magnet:?xt=urn:btih:"]
|
|
|
|
magnetLinkArray.append(magnetHash)
|
|
|
|
if let title = title, let encodedTitle = title.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) {
|
|
magnetLinkArray.append("&dn=\(encodedTitle)")
|
|
}
|
|
|
|
for tracker in trackers {
|
|
if URL(string: tracker.urlString) != nil,
|
|
let encodedUrlString = tracker.urlString.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed)
|
|
{
|
|
magnetLinkArray.append("&tr=\(encodedUrlString)")
|
|
}
|
|
}
|
|
|
|
return magnetLinkArray.joined()
|
|
}
|
|
}
|