Sources: Don't require searchUrl in HTML parser

searchURL used to be a required variable in HTML parsers, but some
HTML sources can be single page which means that a search URL isn't
required.

Also make regex matching case insensitive along with adding anchors
to match newlines.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2023-04-01 23:19:15 -04:00
parent 22bec5da52
commit 51366f3215
4 changed files with 21 additions and 11 deletions

View file

@ -15,7 +15,7 @@ public extension SourceHtmlParser {
}
@NSManaged var rows: String
@NSManaged var searchUrl: String
@NSManaged var searchUrl: String?
@NSManaged var magnetHash: SourceMagnetHash?
@NSManaged var magnetLink: SourceMagnetLink?
@NSManaged var parentSource: Source?

View file

@ -99,7 +99,7 @@
</entity>
<entity name="SourceHtmlParser" representedClassName="SourceHtmlParser" syncable="YES">
<attribute name="rows" attributeType="String" defaultValueString=""/>
<attribute name="searchUrl" attributeType="String" defaultValueString=""/>
<attribute name="searchUrl" optional="YES" attributeType="String"/>
<relationship name="magnetHash" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetHash" inverseName="parentHtmlParser" inverseEntity="SourceMagnetHash"/>
<relationship name="magnetLink" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetLink" inverseName="parentHtmlParser" inverseEntity="SourceMagnetLink"/>
<relationship name="parentSource" optional="YES" maxCount="1" deletionRule="Nullify" destinationEntity="Source" inverseName="htmlParser" inverseEntity="Source"/>

View file

@ -84,7 +84,7 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable {
}
public struct SourceHtmlParserJson: Codable, Hashable, Sendable {
let searchUrl: String
let searchUrl: String?
let rows: String
let title: SourceComplexQueryJson
let magnet: SourceMagnetJson

View file

@ -153,7 +153,7 @@ class ScrapingViewModel: ObservableObject {
// Default to HTML scraping
let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
guard let encodedQuery = searchText.lowercased().addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
await sendSourceError("\(source.name): Could not process search query, invalid characters present.")
return nil
@ -162,8 +162,9 @@ class ScrapingViewModel: ObservableObject {
switch preferredParser {
case .scraping:
if let htmlParser = source.htmlParser {
let replacedSearchUrl = htmlParser.searchUrl
.replacingOccurrences(of: "{query}", with: encodedQuery)
let replacedSearchUrl = htmlParser.searchUrl.map {
$0.replacingOccurrences(of: "{query}", with: encodedQuery)
}
let data = await handleUrls(
baseUrl: baseUrl,
@ -260,14 +261,16 @@ class ScrapingViewModel: ObservableObject {
}
// Checks the base URL for any website data then iterates through the fallback URLs
func handleUrls(baseUrl: String, replacedSearchUrl: String, fallbackUrls: [String]?, sourceName: String) async -> Data? {
if let data = await fetchWebsiteData(urlString: baseUrl + replacedSearchUrl, sourceName: sourceName) {
func handleUrls(baseUrl: String, replacedSearchUrl: String?, fallbackUrls: [String]?, sourceName: String) async -> Data? {
let fetchUrl = baseUrl + (replacedSearchUrl.map { $0 } ?? "")
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
return data
}
if let fallbackUrls {
for fallbackUrl in fallbackUrls {
if let data = await fetchWebsiteData(urlString: fallbackUrl + replacedSearchUrl, sourceName: sourceName) {
let fetchUrl = fallbackUrl + (replacedSearchUrl.map { $0 } ?? "")
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
return data
}
}
@ -927,10 +930,17 @@ class ScrapingViewModel: ObservableObject {
}
func runRegex(parsedValue: String, regexString: String) -> String? {
// TODO: Maybe dynamically parse flags
let replacedRegexString = regexString
.replacingOccurrences(of: "{query}", with: searchText)
.replacingOccurrences(of: "{query}", with: searchText.lowercased())
guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else {
guard
let matchedRegex = try? Regex(
replacedRegexString,
options: [.caseInsensitive, .anchorsMatchLines]
)
.firstMatch(in: parsedValue)
else {
return nil
}