mirror of
https://github.com/Ferrite-iOS/Ferrite.git
synced 2026-01-11 20:10:27 +00:00
Sources: Don't require searchUrl in HTML parser
searchURL used to be a required variable in HTML parsers, but some HTML sources can be single page which means that a search URL isn't required. Also make regex matching case insensitive along with adding anchors to match newlines. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
22bec5da52
commit
51366f3215
4 changed files with 21 additions and 11 deletions
|
|
@ -15,7 +15,7 @@ public extension SourceHtmlParser {
|
|||
}
|
||||
|
||||
@NSManaged var rows: String
|
||||
@NSManaged var searchUrl: String
|
||||
@NSManaged var searchUrl: String?
|
||||
@NSManaged var magnetHash: SourceMagnetHash?
|
||||
@NSManaged var magnetLink: SourceMagnetLink?
|
||||
@NSManaged var parentSource: Source?
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@
|
|||
</entity>
|
||||
<entity name="SourceHtmlParser" representedClassName="SourceHtmlParser" syncable="YES">
|
||||
<attribute name="rows" attributeType="String" defaultValueString=""/>
|
||||
<attribute name="searchUrl" attributeType="String" defaultValueString=""/>
|
||||
<attribute name="searchUrl" optional="YES" attributeType="String"/>
|
||||
<relationship name="magnetHash" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetHash" inverseName="parentHtmlParser" inverseEntity="SourceMagnetHash"/>
|
||||
<relationship name="magnetLink" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetLink" inverseName="parentHtmlParser" inverseEntity="SourceMagnetLink"/>
|
||||
<relationship name="parentSource" optional="YES" maxCount="1" deletionRule="Nullify" destinationEntity="Source" inverseName="htmlParser" inverseEntity="Source"/>
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable {
|
|||
}
|
||||
|
||||
public struct SourceHtmlParserJson: Codable, Hashable, Sendable {
|
||||
let searchUrl: String
|
||||
let searchUrl: String?
|
||||
let rows: String
|
||||
let title: SourceComplexQueryJson
|
||||
let magnet: SourceMagnetJson
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ class ScrapingViewModel: ObservableObject {
|
|||
// Default to HTML scraping
|
||||
let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none
|
||||
|
||||
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
||||
guard let encodedQuery = searchText.lowercased().addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
||||
await sendSourceError("\(source.name): Could not process search query, invalid characters present.")
|
||||
|
||||
return nil
|
||||
|
|
@ -162,8 +162,9 @@ class ScrapingViewModel: ObservableObject {
|
|||
switch preferredParser {
|
||||
case .scraping:
|
||||
if let htmlParser = source.htmlParser {
|
||||
let replacedSearchUrl = htmlParser.searchUrl
|
||||
.replacingOccurrences(of: "{query}", with: encodedQuery)
|
||||
let replacedSearchUrl = htmlParser.searchUrl.map {
|
||||
$0.replacingOccurrences(of: "{query}", with: encodedQuery)
|
||||
}
|
||||
|
||||
let data = await handleUrls(
|
||||
baseUrl: baseUrl,
|
||||
|
|
@ -260,14 +261,16 @@ class ScrapingViewModel: ObservableObject {
|
|||
}
|
||||
|
||||
// Checks the base URL for any website data then iterates through the fallback URLs
|
||||
func handleUrls(baseUrl: String, replacedSearchUrl: String, fallbackUrls: [String]?, sourceName: String) async -> Data? {
|
||||
if let data = await fetchWebsiteData(urlString: baseUrl + replacedSearchUrl, sourceName: sourceName) {
|
||||
func handleUrls(baseUrl: String, replacedSearchUrl: String?, fallbackUrls: [String]?, sourceName: String) async -> Data? {
|
||||
let fetchUrl = baseUrl + (replacedSearchUrl.map { $0 } ?? "")
|
||||
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
|
||||
return data
|
||||
}
|
||||
|
||||
if let fallbackUrls {
|
||||
for fallbackUrl in fallbackUrls {
|
||||
if let data = await fetchWebsiteData(urlString: fallbackUrl + replacedSearchUrl, sourceName: sourceName) {
|
||||
let fetchUrl = fallbackUrl + (replacedSearchUrl.map { $0 } ?? "")
|
||||
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
|
@ -927,10 +930,17 @@ class ScrapingViewModel: ObservableObject {
|
|||
}
|
||||
|
||||
func runRegex(parsedValue: String, regexString: String) -> String? {
|
||||
// TODO: Maybe dynamically parse flags
|
||||
let replacedRegexString = regexString
|
||||
.replacingOccurrences(of: "{query}", with: searchText)
|
||||
.replacingOccurrences(of: "{query}", with: searchText.lowercased())
|
||||
|
||||
guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else {
|
||||
guard
|
||||
let matchedRegex = try? Regex(
|
||||
replacedRegexString,
|
||||
options: [.caseInsensitive, .anchorsMatchLines]
|
||||
)
|
||||
.firstMatch(in: parsedValue)
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue