From 51366f32157b3ada5cae87543dabe4d7d2ef3d90 Mon Sep 17 00:00:00 2001 From: kingbri Date: Sat, 1 Apr 2023 23:19:15 -0400 Subject: [PATCH] Sources: Don't require searchUrl in HTML parser searchURL used to be a required variable in HTML parsers, but some HTML sources can be single page which means that a search URL isn't required. Also make regex matching case insensitive along with adding anchors to match newlines. Signed-off-by: kingbri --- .../SourceHtmlParser+CoreDataProperties.swift | 2 +- .../FerriteDB_v2.xcdatamodel/contents | 2 +- Ferrite/Models/SourceModels.swift | 2 +- Ferrite/ViewModels/ScrapingViewModel.swift | 26 +++++++++++++------ 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/Ferrite/DataManagement/Classes/SourceHtmlParser+CoreDataProperties.swift b/Ferrite/DataManagement/Classes/SourceHtmlParser+CoreDataProperties.swift index e5f19bd..11f9202 100644 --- a/Ferrite/DataManagement/Classes/SourceHtmlParser+CoreDataProperties.swift +++ b/Ferrite/DataManagement/Classes/SourceHtmlParser+CoreDataProperties.swift @@ -15,7 +15,7 @@ public extension SourceHtmlParser { } @NSManaged var rows: String - @NSManaged var searchUrl: String + @NSManaged var searchUrl: String? @NSManaged var magnetHash: SourceMagnetHash? @NSManaged var magnetLink: SourceMagnetLink? @NSManaged var parentSource: Source? diff --git a/Ferrite/DataManagement/FerriteDB.xcdatamodeld/FerriteDB_v2.xcdatamodel/contents b/Ferrite/DataManagement/FerriteDB.xcdatamodeld/FerriteDB_v2.xcdatamodel/contents index 927047b..21b7c00 100644 --- a/Ferrite/DataManagement/FerriteDB.xcdatamodeld/FerriteDB_v2.xcdatamodel/contents +++ b/Ferrite/DataManagement/FerriteDB.xcdatamodeld/FerriteDB_v2.xcdatamodel/contents @@ -99,7 +99,7 @@ - + diff --git a/Ferrite/Models/SourceModels.swift b/Ferrite/Models/SourceModels.swift index ee7290d..6384c72 100644 --- a/Ferrite/Models/SourceModels.swift +++ b/Ferrite/Models/SourceModels.swift @@ -84,7 +84,7 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable { } public struct SourceHtmlParserJson: Codable, Hashable, Sendable { - let searchUrl: String + let searchUrl: String? let rows: String let title: SourceComplexQueryJson let magnet: SourceMagnetJson diff --git a/Ferrite/ViewModels/ScrapingViewModel.swift b/Ferrite/ViewModels/ScrapingViewModel.swift index f125ad1..d75cb59 100644 --- a/Ferrite/ViewModels/ScrapingViewModel.swift +++ b/Ferrite/ViewModels/ScrapingViewModel.swift @@ -153,7 +153,7 @@ class ScrapingViewModel: ObservableObject { // Default to HTML scraping let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none - guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else { + guard let encodedQuery = searchText.lowercased().addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else { await sendSourceError("\(source.name): Could not process search query, invalid characters present.") return nil @@ -162,8 +162,9 @@ class ScrapingViewModel: ObservableObject { switch preferredParser { case .scraping: if let htmlParser = source.htmlParser { - let replacedSearchUrl = htmlParser.searchUrl - .replacingOccurrences(of: "{query}", with: encodedQuery) + let replacedSearchUrl = htmlParser.searchUrl.map { + $0.replacingOccurrences(of: "{query}", with: encodedQuery) + } let data = await handleUrls( baseUrl: baseUrl, @@ -260,14 +261,16 @@ class ScrapingViewModel: ObservableObject { } // Checks the base URL for any website data then iterates through the fallback URLs - func handleUrls(baseUrl: String, replacedSearchUrl: String, fallbackUrls: [String]?, sourceName: String) async -> Data? { - if let data = await fetchWebsiteData(urlString: baseUrl + replacedSearchUrl, sourceName: sourceName) { + func handleUrls(baseUrl: String, replacedSearchUrl: String?, fallbackUrls: [String]?, sourceName: String) async -> Data? { + let fetchUrl = baseUrl + (replacedSearchUrl.map { $0 } ?? "") + if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) { return data } if let fallbackUrls { for fallbackUrl in fallbackUrls { - if let data = await fetchWebsiteData(urlString: fallbackUrl + replacedSearchUrl, sourceName: sourceName) { + let fetchUrl = fallbackUrl + (replacedSearchUrl.map { $0 } ?? "") + if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) { return data } } @@ -927,10 +930,17 @@ class ScrapingViewModel: ObservableObject { } func runRegex(parsedValue: String, regexString: String) -> String? { + // TODO: Maybe dynamically parse flags let replacedRegexString = regexString - .replacingOccurrences(of: "{query}", with: searchText) + .replacingOccurrences(of: "{query}", with: searchText.lowercased()) - guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else { + guard + let matchedRegex = try? Regex( + replacedRegexString, + options: [.caseInsensitive, .anchorsMatchLines] + ) + .firstMatch(in: parsedValue) + else { return nil }