mirror of
https://github.com/Ferrite-iOS/Ferrite.git
synced 2026-05-11 12:40:34 +00:00
Sources: Don't require searchUrl in HTML parser
searchURL used to be a required variable in HTML parsers, but some HTML sources can be single page which means that a search URL isn't required. Also make regex matching case insensitive along with adding anchors to match newlines. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
22bec5da52
commit
51366f3215
4 changed files with 21 additions and 11 deletions
|
|
@ -15,7 +15,7 @@ public extension SourceHtmlParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
@NSManaged var rows: String
|
@NSManaged var rows: String
|
||||||
@NSManaged var searchUrl: String
|
@NSManaged var searchUrl: String?
|
||||||
@NSManaged var magnetHash: SourceMagnetHash?
|
@NSManaged var magnetHash: SourceMagnetHash?
|
||||||
@NSManaged var magnetLink: SourceMagnetLink?
|
@NSManaged var magnetLink: SourceMagnetLink?
|
||||||
@NSManaged var parentSource: Source?
|
@NSManaged var parentSource: Source?
|
||||||
|
|
|
||||||
|
|
@ -99,7 +99,7 @@
|
||||||
</entity>
|
</entity>
|
||||||
<entity name="SourceHtmlParser" representedClassName="SourceHtmlParser" syncable="YES">
|
<entity name="SourceHtmlParser" representedClassName="SourceHtmlParser" syncable="YES">
|
||||||
<attribute name="rows" attributeType="String" defaultValueString=""/>
|
<attribute name="rows" attributeType="String" defaultValueString=""/>
|
||||||
<attribute name="searchUrl" attributeType="String" defaultValueString=""/>
|
<attribute name="searchUrl" optional="YES" attributeType="String"/>
|
||||||
<relationship name="magnetHash" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetHash" inverseName="parentHtmlParser" inverseEntity="SourceMagnetHash"/>
|
<relationship name="magnetHash" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetHash" inverseName="parentHtmlParser" inverseEntity="SourceMagnetHash"/>
|
||||||
<relationship name="magnetLink" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetLink" inverseName="parentHtmlParser" inverseEntity="SourceMagnetLink"/>
|
<relationship name="magnetLink" optional="YES" maxCount="1" deletionRule="Cascade" destinationEntity="SourceMagnetLink" inverseName="parentHtmlParser" inverseEntity="SourceMagnetLink"/>
|
||||||
<relationship name="parentSource" optional="YES" maxCount="1" deletionRule="Nullify" destinationEntity="Source" inverseName="htmlParser" inverseEntity="Source"/>
|
<relationship name="parentSource" optional="YES" maxCount="1" deletionRule="Nullify" destinationEntity="Source" inverseName="htmlParser" inverseEntity="Source"/>
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public struct SourceHtmlParserJson: Codable, Hashable, Sendable {
|
public struct SourceHtmlParserJson: Codable, Hashable, Sendable {
|
||||||
let searchUrl: String
|
let searchUrl: String?
|
||||||
let rows: String
|
let rows: String
|
||||||
let title: SourceComplexQueryJson
|
let title: SourceComplexQueryJson
|
||||||
let magnet: SourceMagnetJson
|
let magnet: SourceMagnetJson
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@ class ScrapingViewModel: ObservableObject {
|
||||||
// Default to HTML scraping
|
// Default to HTML scraping
|
||||||
let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none
|
let preferredParser = SourcePreferredParser(rawValue: source.preferredParser) ?? .none
|
||||||
|
|
||||||
guard let encodedQuery = searchText.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
guard let encodedQuery = searchText.lowercased().addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
|
||||||
await sendSourceError("\(source.name): Could not process search query, invalid characters present.")
|
await sendSourceError("\(source.name): Could not process search query, invalid characters present.")
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
@ -162,8 +162,9 @@ class ScrapingViewModel: ObservableObject {
|
||||||
switch preferredParser {
|
switch preferredParser {
|
||||||
case .scraping:
|
case .scraping:
|
||||||
if let htmlParser = source.htmlParser {
|
if let htmlParser = source.htmlParser {
|
||||||
let replacedSearchUrl = htmlParser.searchUrl
|
let replacedSearchUrl = htmlParser.searchUrl.map {
|
||||||
.replacingOccurrences(of: "{query}", with: encodedQuery)
|
$0.replacingOccurrences(of: "{query}", with: encodedQuery)
|
||||||
|
}
|
||||||
|
|
||||||
let data = await handleUrls(
|
let data = await handleUrls(
|
||||||
baseUrl: baseUrl,
|
baseUrl: baseUrl,
|
||||||
|
|
@ -260,14 +261,16 @@ class ScrapingViewModel: ObservableObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks the base URL for any website data then iterates through the fallback URLs
|
// Checks the base URL for any website data then iterates through the fallback URLs
|
||||||
func handleUrls(baseUrl: String, replacedSearchUrl: String, fallbackUrls: [String]?, sourceName: String) async -> Data? {
|
func handleUrls(baseUrl: String, replacedSearchUrl: String?, fallbackUrls: [String]?, sourceName: String) async -> Data? {
|
||||||
if let data = await fetchWebsiteData(urlString: baseUrl + replacedSearchUrl, sourceName: sourceName) {
|
let fetchUrl = baseUrl + (replacedSearchUrl.map { $0 } ?? "")
|
||||||
|
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
if let fallbackUrls {
|
if let fallbackUrls {
|
||||||
for fallbackUrl in fallbackUrls {
|
for fallbackUrl in fallbackUrls {
|
||||||
if let data = await fetchWebsiteData(urlString: fallbackUrl + replacedSearchUrl, sourceName: sourceName) {
|
let fetchUrl = fallbackUrl + (replacedSearchUrl.map { $0 } ?? "")
|
||||||
|
if let data = await fetchWebsiteData(urlString: fetchUrl, sourceName: sourceName) {
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -927,10 +930,17 @@ class ScrapingViewModel: ObservableObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
func runRegex(parsedValue: String, regexString: String) -> String? {
|
func runRegex(parsedValue: String, regexString: String) -> String? {
|
||||||
|
// TODO: Maybe dynamically parse flags
|
||||||
let replacedRegexString = regexString
|
let replacedRegexString = regexString
|
||||||
.replacingOccurrences(of: "{query}", with: searchText)
|
.replacingOccurrences(of: "{query}", with: searchText.lowercased())
|
||||||
|
|
||||||
guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else {
|
guard
|
||||||
|
let matchedRegex = try? Regex(
|
||||||
|
replacedRegexString,
|
||||||
|
options: [.caseInsensitive, .anchorsMatchLines]
|
||||||
|
)
|
||||||
|
.firstMatch(in: parsedValue)
|
||||||
|
else {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue