From 2cf6e464221fe226f4ed6f803b72372ea879c72e Mon Sep 17 00:00:00 2001 From: kingbri Date: Fri, 31 Mar 2023 22:47:13 -0400 Subject: [PATCH] Sources: Improve regex and require title Titles are now required as an entry without a title shouldn't be featured. Support via regex is now added for matching along with splicing strings via capture groups. If a capture group isn't present, assume that a contains check is occurring. Also migrate back to searchText being located in scrapingModel. Signed-off-by: kingbri --- Ferrite/Models/SourceModels.swift | 6 +- Ferrite/ViewModels/PluginManager.swift | 40 ++--- Ferrite/ViewModels/ScrapingViewModel.swift | 155 +++++++++++------- .../SearchResult/SearchResultsView.swift | 4 +- Ferrite/Views/ContentView.swift | 7 +- 5 files changed, 118 insertions(+), 94 deletions(-) diff --git a/Ferrite/Models/SourceModels.swift b/Ferrite/Models/SourceModels.swift index 1f2b278..ee7290d 100644 --- a/Ferrite/Models/SourceModels.swift +++ b/Ferrite/Models/SourceModels.swift @@ -63,10 +63,10 @@ public struct SourceJsonParserJson: Codable, Hashable, Sendable { let searchUrl: String let results: String? let subResults: String? + let title: SourceComplexQueryJson let magnetHash: SourceComplexQueryJson? let magnetLink: SourceComplexQueryJson? let subName: SourceComplexQueryJson? - let title: SourceComplexQueryJson? let size: SourceComplexQueryJson? let sl: SourceSLJson? } @@ -75,10 +75,10 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable { let rssUrl: String? let searchUrl: String let items: String + let title: SourceComplexQueryJson let magnetHash: SourceComplexQueryJson? let magnetLink: SourceComplexQueryJson? let subName: SourceComplexQueryJson? - let title: SourceComplexQueryJson? let size: SourceComplexQueryJson? let sl: SourceSLJson? } @@ -86,9 +86,9 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable { public struct SourceHtmlParserJson: Codable, Hashable, Sendable { let searchUrl: String let rows: String + let title: SourceComplexQueryJson let magnet: SourceMagnetJson let subName: SourceComplexQueryJson? - let title: SourceComplexQueryJson? let size: SourceComplexQueryJson? let sl: SourceSLJson? } diff --git a/Ferrite/ViewModels/PluginManager.swift b/Ferrite/ViewModels/PluginManager.swift index 39d7c2e..97036b2 100644 --- a/Ferrite/ViewModels/PluginManager.swift +++ b/Ferrite/ViewModels/PluginManager.swift @@ -584,14 +584,12 @@ public class PluginManager: ObservableObject { newSourceJsonParser.subName = newSourceSubName } - if let titleJson = jsonParserJson.title { - let newSourceTitle = SourceTitle(context: backgroundContext) - newSourceTitle.query = titleJson.query - newSourceTitle.attribute = titleJson.attribute ?? "text" - newSourceTitle.discriminator = titleJson.discriminator + let newSourceTitle = SourceTitle(context: backgroundContext) + newSourceTitle.query = jsonParserJson.title.query + newSourceTitle.attribute = jsonParserJson.title.attribute ?? "text" + newSourceTitle.discriminator = jsonParserJson.title.discriminator - newSourceJsonParser.title = newSourceTitle - } + newSourceJsonParser.title = newSourceTitle if let sizeJson = jsonParserJson.size { let newSourceSize = SourceSize(context: backgroundContext) @@ -656,15 +654,13 @@ public class PluginManager: ObservableObject { newSourceRssParser.subName = newSourceSubName } - if let titleJson = rssParserJson.title { - let newSourceTitle = SourceTitle(context: backgroundContext) - newSourceTitle.query = titleJson.query - newSourceTitle.attribute = titleJson.attribute ?? "text" - newSourceTitle.discriminator = titleJson.discriminator - newSourceTitle.regex = titleJson.regex + let newSourceTitle = SourceTitle(context: backgroundContext) + newSourceTitle.query = rssParserJson.title.query + newSourceTitle.attribute = rssParserJson.title.attribute ?? "text" + newSourceTitle.discriminator = rssParserJson.title.discriminator + newSourceTitle.regex = rssParserJson.title.regex - newSourceRssParser.title = newSourceTitle - } + newSourceRssParser.title = newSourceTitle if let sizeJson = rssParserJson.size { let newSourceSize = SourceSize(context: backgroundContext) @@ -708,15 +704,13 @@ public class PluginManager: ObservableObject { newSourceHtmlParser.subName = newSourceSubName } - // Adds a title complex query if present - if let titleJson = htmlParserJson.title { - let newSourceTitle = SourceTitle(context: backgroundContext) - newSourceTitle.query = titleJson.query - newSourceTitle.attribute = titleJson.attribute ?? "text" - newSourceTitle.regex = titleJson.regex + // Adds a title complex query + let newSourceTitle = SourceTitle(context: backgroundContext) + newSourceTitle.query = htmlParserJson.title.query + newSourceTitle.attribute = htmlParserJson.title.attribute ?? "text" + newSourceTitle.regex = htmlParserJson.title.regex - newSourceHtmlParser.title = newSourceTitle - } + newSourceHtmlParser.title = newSourceTitle // Adds a size complex query if present if let sizeJson = htmlParserJson.size { diff --git a/Ferrite/ViewModels/ScrapingViewModel.swift b/Ferrite/ViewModels/ScrapingViewModel.swift index 8a2bfad..f125ad1 100644 --- a/Ferrite/ViewModels/ScrapingViewModel.swift +++ b/Ferrite/ViewModels/ScrapingViewModel.swift @@ -22,6 +22,7 @@ class ScrapingViewModel: ObservableObject { runningSearchTask = nil } + @Published var searchText: String = "" @Published var searchResults: [SearchResult] = [] // Only add results with valid magnet hashes to the search results array @@ -67,7 +68,7 @@ class ScrapingViewModel: ObservableObject { await logManager?.error(description, showToast: false) } - public func scanSources(sources: [Source], searchText: String, debridManager: DebridManager) async { + public func scanSources(sources: [Source], debridManager: DebridManager) async { await logManager?.info("Started scanning sources for query \"\(searchText)\"") if sources.isEmpty { @@ -101,7 +102,7 @@ class ScrapingViewModel: ObservableObject { if source.enabled { group.addTask { await self.updateCurrentSourceNames(source.name) - let requestResult = await self.executeParser(source: source, searchText: searchText) + let requestResult = await self.executeParser(source: source) return (requestResult, source.name) } @@ -142,7 +143,7 @@ class ScrapingViewModel: ObservableObject { } } - func executeParser(source: Source, searchText: String) async -> SearchRequestResult? { + func executeParser(source: Source) async -> SearchRequestResult? { guard let baseUrl = source.baseUrl else { await logManager?.error("Scraping: The base URL could not be found for source \(source.name)") @@ -364,7 +365,7 @@ class ScrapingViewModel: ObservableObject { // Fetches the data for a URL public func fetchWebsiteData(urlString: String, sourceName: String) async -> Data? { - guard let url = URL(string: urlString) else { + guard let url = URL(string: urlString.trimmingCharacters(in: .whitespacesAndNewlines)) else { await sendSourceError("\(sourceName): Source doesn't contain a valid URL, contact the source dev!") return nil @@ -467,11 +468,36 @@ class ScrapingViewModel: ObservableObject { return SearchRequestResult(results: tempResults, magnets: magnets) } + // TODO: Add regex parsing for API public func parseJsonResult(_ result: JSON, jsonParser: SourceJsonParser, source: Source, existingSearchResult: SearchResult? = nil) -> SearchResult? { + // Enforce these parsers + guard let titleParser = jsonParser.title else { + return nil + } + + var title: String? = existingSearchResult?.title + if let existingTitle = title, + let discriminatorQuery = titleParser.discriminator + { + let rawDiscriminator = result[discriminatorQuery.components(separatedBy: ".")].rawValue + + if !(rawDiscriminator is NSNull) { + title = String(describing: rawDiscriminator) + existingTitle + } + } else if title == nil { + let rawTitle = result[titleParser.query].rawValue + title = rawTitle is NSNull ? nil : String(describing: rawTitle) + } + + // Return if a title doesn't exist + if title == nil { + return nil + } + var magnetHash: String? = existingSearchResult?.magnet.hash if let magnetHashParser = jsonParser.magnetHash { let rawHash = result[magnetHashParser.query.components(separatedBy: ".")].rawValue @@ -487,23 +513,7 @@ class ScrapingViewModel: ObservableObject { link = rawLink is NSNull ? nil : String(describing: rawLink) } - var title: String? = existingSearchResult?.title - if let titleParser = jsonParser.title { - if let existingTitle = existingSearchResult?.title, - let discriminatorQuery = titleParser.discriminator - { - let rawDiscriminator = result[discriminatorQuery.components(separatedBy: ".")].rawValue - - if !(rawDiscriminator is NSNull) { - title = String(describing: rawDiscriminator) + existingTitle - } - } else if existingSearchResult?.title == nil { - let rawTitle = result[titleParser.query].rawValue - title = rawTitle is NSNull ? nil : String(describing: rawTitle) - } - } - - // Return if no magnet hash exists + // Return if a magnet hash doesn't exist let magnet = Magnet(hash: magnetHash, link: link, title: title, trackers: source.trackers) if magnet.hash == nil { return nil @@ -573,6 +583,21 @@ class ScrapingViewModel: ObservableObject { var magnets: [Magnet] = [] for item in items { + // Enforce these parsers + guard let titleParser = rssParser.title else { + continue + } + + guard let title = try? runRssComplexQuery( + item: item, + query: titleParser.query, + attribute: titleParser.attribute, + discriminator: titleParser.discriminator, + regexString: titleParser.regex + ) else { + continue + } + // Parse magnet link or translate hash var magnetHash: String? if let magnetHashParser = rssParser.magnetHash { @@ -596,17 +621,6 @@ class ScrapingViewModel: ObservableObject { ) } - var title: String? - if let titleParser = rssParser.title { - title = try? runRssComplexQuery( - item: item, - query: titleParser.query, - attribute: titleParser.attribute, - discriminator: titleParser.discriminator, - regexString: titleParser.regex - ) - } - // Fetches the subName for the source if there is one var subName: String? if let subNameParser = rssParser.subName { @@ -666,7 +680,7 @@ class ScrapingViewModel: ObservableObject { } let result = SearchResult( - title: title ?? "No title", + title: title, source: subName.map { "\(source.name) - \($0)" } ?? source.name, size: size ?? "", magnet: magnet, @@ -708,10 +722,8 @@ class ScrapingViewModel: ObservableObject { // A capture group must be used in the provided regex if let regexString, - let parsedValue, - let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value - { - return regexValue + let parsedValue { + return runRegex(parsedValue: parsedValue, regexString: regexString) } else { return parsedValue } @@ -740,18 +752,37 @@ class ScrapingViewModel: ObservableObject { // If there's an error, continue instead of returning with nothing for row in rows { do { - // Fetches the magnet link - // If the magnet is located on an external page, fetch the external page and grab the magnet link - // External page fetching affects source performance - guard let magnetParser = htmlParser.magnetLink else { + // Enforce these parsers + guard + let magnetParser = htmlParser.magnetLink, + let titleParser = htmlParser.title + else { continue } + // Fetches the episode/movie title + // Place here for filtering purposes + guard let title = try? runHtmlComplexQuery( + row: row, + query: titleParser.query, + attribute: titleParser.attribute, + regexString: titleParser.regex + ) else { + continue + } + + // Fetches the magnet link + // If the magnet is located on an external page, fetch the external page and grab the magnet link + // External page fetching affects source performance var href: String if let externalMagnetQuery = magnetParser.externalLinkQuery, !externalMagnetQuery.isEmpty { + guard let externalMagnetUrl = try row.select(externalMagnetQuery).first()?.attr("href") else { + continue + } + + let replacedMagnetUrl = externalMagnetUrl.starts(with: "/") ? baseUrl + externalMagnetUrl : externalMagnetUrl guard - let externalMagnetLink = try row.select(externalMagnetQuery).first()?.attr("href"), - let data = await fetchWebsiteData(urlString: baseUrl + externalMagnetLink, sourceName: source.name), + let data = await fetchWebsiteData(urlString: replacedMagnetUrl, sourceName: source.name), let magnetHtml = String(data: data, encoding: .utf8) else { continue @@ -786,17 +817,6 @@ class ScrapingViewModel: ObservableObject { continue } - // Fetches the episode/movie title - var title: String? - if let titleParser = htmlParser.title { - title = try? runHtmlComplexQuery( - row: row, - query: titleParser.query, - attribute: titleParser.attribute, - regexString: titleParser.regex - ) - } - var subName: String? if let subNameParser = htmlParser.subName { subName = try? runHtmlComplexQuery( @@ -859,7 +879,7 @@ class ScrapingViewModel: ObservableObject { } let result = SearchResult( - title: title ?? "No title", + title: title, source: subName.map { "\(source.name) - \($0)" } ?? source.name, size: size ?? "", magnet: magnet, @@ -898,12 +918,25 @@ class ScrapingViewModel: ObservableObject { parsedValue = try result?.attr(attribute) } - // A capture group must be used in the provided regex - if let regexString, - let parsedValue, - let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value - { - return regexValue + if let parsedValue, + let regexString { + return runRegex(parsedValue: parsedValue, regexString: regexString) + } else { + return parsedValue + } + } + + func runRegex(parsedValue: String, regexString: String) -> String? { + let replacedRegexString = regexString + .replacingOccurrences(of: "{query}", with: searchText) + + guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else { + return nil + } + + // Is there a capture group present? Otherwise return the original matched string + if let group = matchedRegex.groups[safe: 0] { + return group.value } else { return parsedValue } diff --git a/Ferrite/Views/ComponentViews/SearchResult/SearchResultsView.swift b/Ferrite/Views/ComponentViews/SearchResult/SearchResultsView.swift index a101209..e812852 100644 --- a/Ferrite/Views/ComponentViews/SearchResult/SearchResultsView.swift +++ b/Ferrite/Views/ComponentViews/SearchResult/SearchResultsView.swift @@ -16,8 +16,6 @@ struct SearchResultsView: View { @AppStorage("Behavior.UsesRandomSearchText") var usesRandomSearchText: Bool = false - @Binding var searchText: String - @Binding var searchPrompt: String @State private var lastSearchPromptIndex: Int = -1 let searchBarTextArray: [String] = [ @@ -38,7 +36,7 @@ struct SearchResultsView: View { .onAppear { searchPrompt = getSearchPrompt() } - .onChange(of: searchText) { newText in + .onChange(of: scrapingModel.searchText) { newText in if newText.isEmpty, isSearching { searchPrompt = getSearchPrompt() } diff --git a/Ferrite/Views/ContentView.swift b/Ferrite/Views/ContentView.swift index a6fc8c6..258efb8 100644 --- a/Ferrite/Views/ContentView.swift +++ b/Ferrite/Views/ContentView.swift @@ -16,18 +16,18 @@ struct ContentView: View { @AppStorage("Behavior.AutocorrectSearch") var autocorrectSearch: Bool = false - @State private var searchText: String = "" + // TODO: Fix searchPrompt updating @State private var searchPrompt: String = "Search" var body: some View { NavView { List { - SearchResultsView(searchText: $searchText, searchPrompt: $searchPrompt) + SearchResultsView(searchPrompt: $searchPrompt) } .listStyle(.insetGrouped) .inlinedList(inset: 20) .navigationTitle("Search") - .searchable(text: $searchText, placement: .navigationBarDrawer(displayMode: .always), prompt: Text(searchPrompt)) + .searchable(text: $scrapingModel.searchText, placement: .navigationBarDrawer(displayMode: .always), prompt: Text(searchPrompt)) .onSubmit(of: .search) { if let runningSearchTask = scrapingModel.runningSearchTask, runningSearchTask.isCancelled { scrapingModel.runningSearchTask = nil @@ -38,7 +38,6 @@ struct ContentView: View { let sources = pluginManager.fetchInstalledSources() await scrapingModel.scanSources( sources: sources, - searchText: searchText, debridManager: debridManager )