Sources: Improve regex and require title

Titles are now required as an entry without a title shouldn't be
featured. Support via regex is now added for matching along with
splicing strings via capture groups.

If a capture group isn't present, assume that a contains check
is occurring.

Also migrate back to searchText being located in scrapingModel.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2023-03-31 22:47:13 -04:00
parent 2982c971a8
commit 2cf6e46422
5 changed files with 118 additions and 94 deletions

View file

@ -63,10 +63,10 @@ public struct SourceJsonParserJson: Codable, Hashable, Sendable {
let searchUrl: String let searchUrl: String
let results: String? let results: String?
let subResults: String? let subResults: String?
let title: SourceComplexQueryJson
let magnetHash: SourceComplexQueryJson? let magnetHash: SourceComplexQueryJson?
let magnetLink: SourceComplexQueryJson? let magnetLink: SourceComplexQueryJson?
let subName: SourceComplexQueryJson? let subName: SourceComplexQueryJson?
let title: SourceComplexQueryJson?
let size: SourceComplexQueryJson? let size: SourceComplexQueryJson?
let sl: SourceSLJson? let sl: SourceSLJson?
} }
@ -75,10 +75,10 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable {
let rssUrl: String? let rssUrl: String?
let searchUrl: String let searchUrl: String
let items: String let items: String
let title: SourceComplexQueryJson
let magnetHash: SourceComplexQueryJson? let magnetHash: SourceComplexQueryJson?
let magnetLink: SourceComplexQueryJson? let magnetLink: SourceComplexQueryJson?
let subName: SourceComplexQueryJson? let subName: SourceComplexQueryJson?
let title: SourceComplexQueryJson?
let size: SourceComplexQueryJson? let size: SourceComplexQueryJson?
let sl: SourceSLJson? let sl: SourceSLJson?
} }
@ -86,9 +86,9 @@ public struct SourceRssParserJson: Codable, Hashable, Sendable {
public struct SourceHtmlParserJson: Codable, Hashable, Sendable { public struct SourceHtmlParserJson: Codable, Hashable, Sendable {
let searchUrl: String let searchUrl: String
let rows: String let rows: String
let title: SourceComplexQueryJson
let magnet: SourceMagnetJson let magnet: SourceMagnetJson
let subName: SourceComplexQueryJson? let subName: SourceComplexQueryJson?
let title: SourceComplexQueryJson?
let size: SourceComplexQueryJson? let size: SourceComplexQueryJson?
let sl: SourceSLJson? let sl: SourceSLJson?
} }

View file

@ -584,14 +584,12 @@ public class PluginManager: ObservableObject {
newSourceJsonParser.subName = newSourceSubName newSourceJsonParser.subName = newSourceSubName
} }
if let titleJson = jsonParserJson.title { let newSourceTitle = SourceTitle(context: backgroundContext)
let newSourceTitle = SourceTitle(context: backgroundContext) newSourceTitle.query = jsonParserJson.title.query
newSourceTitle.query = titleJson.query newSourceTitle.attribute = jsonParserJson.title.attribute ?? "text"
newSourceTitle.attribute = titleJson.attribute ?? "text" newSourceTitle.discriminator = jsonParserJson.title.discriminator
newSourceTitle.discriminator = titleJson.discriminator
newSourceJsonParser.title = newSourceTitle newSourceJsonParser.title = newSourceTitle
}
if let sizeJson = jsonParserJson.size { if let sizeJson = jsonParserJson.size {
let newSourceSize = SourceSize(context: backgroundContext) let newSourceSize = SourceSize(context: backgroundContext)
@ -656,15 +654,13 @@ public class PluginManager: ObservableObject {
newSourceRssParser.subName = newSourceSubName newSourceRssParser.subName = newSourceSubName
} }
if let titleJson = rssParserJson.title { let newSourceTitle = SourceTitle(context: backgroundContext)
let newSourceTitle = SourceTitle(context: backgroundContext) newSourceTitle.query = rssParserJson.title.query
newSourceTitle.query = titleJson.query newSourceTitle.attribute = rssParserJson.title.attribute ?? "text"
newSourceTitle.attribute = titleJson.attribute ?? "text" newSourceTitle.discriminator = rssParserJson.title.discriminator
newSourceTitle.discriminator = titleJson.discriminator newSourceTitle.regex = rssParserJson.title.regex
newSourceTitle.regex = titleJson.regex
newSourceRssParser.title = newSourceTitle newSourceRssParser.title = newSourceTitle
}
if let sizeJson = rssParserJson.size { if let sizeJson = rssParserJson.size {
let newSourceSize = SourceSize(context: backgroundContext) let newSourceSize = SourceSize(context: backgroundContext)
@ -708,15 +704,13 @@ public class PluginManager: ObservableObject {
newSourceHtmlParser.subName = newSourceSubName newSourceHtmlParser.subName = newSourceSubName
} }
// Adds a title complex query if present // Adds a title complex query
if let titleJson = htmlParserJson.title { let newSourceTitle = SourceTitle(context: backgroundContext)
let newSourceTitle = SourceTitle(context: backgroundContext) newSourceTitle.query = htmlParserJson.title.query
newSourceTitle.query = titleJson.query newSourceTitle.attribute = htmlParserJson.title.attribute ?? "text"
newSourceTitle.attribute = titleJson.attribute ?? "text" newSourceTitle.regex = htmlParserJson.title.regex
newSourceTitle.regex = titleJson.regex
newSourceHtmlParser.title = newSourceTitle newSourceHtmlParser.title = newSourceTitle
}
// Adds a size complex query if present // Adds a size complex query if present
if let sizeJson = htmlParserJson.size { if let sizeJson = htmlParserJson.size {

View file

@ -22,6 +22,7 @@ class ScrapingViewModel: ObservableObject {
runningSearchTask = nil runningSearchTask = nil
} }
@Published var searchText: String = ""
@Published var searchResults: [SearchResult] = [] @Published var searchResults: [SearchResult] = []
// Only add results with valid magnet hashes to the search results array // Only add results with valid magnet hashes to the search results array
@ -67,7 +68,7 @@ class ScrapingViewModel: ObservableObject {
await logManager?.error(description, showToast: false) await logManager?.error(description, showToast: false)
} }
public func scanSources(sources: [Source], searchText: String, debridManager: DebridManager) async { public func scanSources(sources: [Source], debridManager: DebridManager) async {
await logManager?.info("Started scanning sources for query \"\(searchText)\"") await logManager?.info("Started scanning sources for query \"\(searchText)\"")
if sources.isEmpty { if sources.isEmpty {
@ -101,7 +102,7 @@ class ScrapingViewModel: ObservableObject {
if source.enabled { if source.enabled {
group.addTask { group.addTask {
await self.updateCurrentSourceNames(source.name) await self.updateCurrentSourceNames(source.name)
let requestResult = await self.executeParser(source: source, searchText: searchText) let requestResult = await self.executeParser(source: source)
return (requestResult, source.name) return (requestResult, source.name)
} }
@ -142,7 +143,7 @@ class ScrapingViewModel: ObservableObject {
} }
} }
func executeParser(source: Source, searchText: String) async -> SearchRequestResult? { func executeParser(source: Source) async -> SearchRequestResult? {
guard let baseUrl = source.baseUrl else { guard let baseUrl = source.baseUrl else {
await logManager?.error("Scraping: The base URL could not be found for source \(source.name)") await logManager?.error("Scraping: The base URL could not be found for source \(source.name)")
@ -364,7 +365,7 @@ class ScrapingViewModel: ObservableObject {
// Fetches the data for a URL // Fetches the data for a URL
public func fetchWebsiteData(urlString: String, sourceName: String) async -> Data? { public func fetchWebsiteData(urlString: String, sourceName: String) async -> Data? {
guard let url = URL(string: urlString) else { guard let url = URL(string: urlString.trimmingCharacters(in: .whitespacesAndNewlines)) else {
await sendSourceError("\(sourceName): Source doesn't contain a valid URL, contact the source dev!") await sendSourceError("\(sourceName): Source doesn't contain a valid URL, contact the source dev!")
return nil return nil
@ -467,11 +468,36 @@ class ScrapingViewModel: ObservableObject {
return SearchRequestResult(results: tempResults, magnets: magnets) return SearchRequestResult(results: tempResults, magnets: magnets)
} }
// TODO: Add regex parsing for API
public func parseJsonResult(_ result: JSON, public func parseJsonResult(_ result: JSON,
jsonParser: SourceJsonParser, jsonParser: SourceJsonParser,
source: Source, source: Source,
existingSearchResult: SearchResult? = nil) -> SearchResult? existingSearchResult: SearchResult? = nil) -> SearchResult?
{ {
// Enforce these parsers
guard let titleParser = jsonParser.title else {
return nil
}
var title: String? = existingSearchResult?.title
if let existingTitle = title,
let discriminatorQuery = titleParser.discriminator
{
let rawDiscriminator = result[discriminatorQuery.components(separatedBy: ".")].rawValue
if !(rawDiscriminator is NSNull) {
title = String(describing: rawDiscriminator) + existingTitle
}
} else if title == nil {
let rawTitle = result[titleParser.query].rawValue
title = rawTitle is NSNull ? nil : String(describing: rawTitle)
}
// Return if a title doesn't exist
if title == nil {
return nil
}
var magnetHash: String? = existingSearchResult?.magnet.hash var magnetHash: String? = existingSearchResult?.magnet.hash
if let magnetHashParser = jsonParser.magnetHash { if let magnetHashParser = jsonParser.magnetHash {
let rawHash = result[magnetHashParser.query.components(separatedBy: ".")].rawValue let rawHash = result[magnetHashParser.query.components(separatedBy: ".")].rawValue
@ -487,23 +513,7 @@ class ScrapingViewModel: ObservableObject {
link = rawLink is NSNull ? nil : String(describing: rawLink) link = rawLink is NSNull ? nil : String(describing: rawLink)
} }
var title: String? = existingSearchResult?.title // Return if a magnet hash doesn't exist
if let titleParser = jsonParser.title {
if let existingTitle = existingSearchResult?.title,
let discriminatorQuery = titleParser.discriminator
{
let rawDiscriminator = result[discriminatorQuery.components(separatedBy: ".")].rawValue
if !(rawDiscriminator is NSNull) {
title = String(describing: rawDiscriminator) + existingTitle
}
} else if existingSearchResult?.title == nil {
let rawTitle = result[titleParser.query].rawValue
title = rawTitle is NSNull ? nil : String(describing: rawTitle)
}
}
// Return if no magnet hash exists
let magnet = Magnet(hash: magnetHash, link: link, title: title, trackers: source.trackers) let magnet = Magnet(hash: magnetHash, link: link, title: title, trackers: source.trackers)
if magnet.hash == nil { if magnet.hash == nil {
return nil return nil
@ -573,6 +583,21 @@ class ScrapingViewModel: ObservableObject {
var magnets: [Magnet] = [] var magnets: [Magnet] = []
for item in items { for item in items {
// Enforce these parsers
guard let titleParser = rssParser.title else {
continue
}
guard let title = try? runRssComplexQuery(
item: item,
query: titleParser.query,
attribute: titleParser.attribute,
discriminator: titleParser.discriminator,
regexString: titleParser.regex
) else {
continue
}
// Parse magnet link or translate hash // Parse magnet link or translate hash
var magnetHash: String? var magnetHash: String?
if let magnetHashParser = rssParser.magnetHash { if let magnetHashParser = rssParser.magnetHash {
@ -596,17 +621,6 @@ class ScrapingViewModel: ObservableObject {
) )
} }
var title: String?
if let titleParser = rssParser.title {
title = try? runRssComplexQuery(
item: item,
query: titleParser.query,
attribute: titleParser.attribute,
discriminator: titleParser.discriminator,
regexString: titleParser.regex
)
}
// Fetches the subName for the source if there is one // Fetches the subName for the source if there is one
var subName: String? var subName: String?
if let subNameParser = rssParser.subName { if let subNameParser = rssParser.subName {
@ -666,7 +680,7 @@ class ScrapingViewModel: ObservableObject {
} }
let result = SearchResult( let result = SearchResult(
title: title ?? "No title", title: title,
source: subName.map { "\(source.name) - \($0)" } ?? source.name, source: subName.map { "\(source.name) - \($0)" } ?? source.name,
size: size ?? "", size: size ?? "",
magnet: magnet, magnet: magnet,
@ -708,10 +722,8 @@ class ScrapingViewModel: ObservableObject {
// A capture group must be used in the provided regex // A capture group must be used in the provided regex
if let regexString, if let regexString,
let parsedValue, let parsedValue {
let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value return runRegex(parsedValue: parsedValue, regexString: regexString)
{
return regexValue
} else { } else {
return parsedValue return parsedValue
} }
@ -740,18 +752,37 @@ class ScrapingViewModel: ObservableObject {
// If there's an error, continue instead of returning with nothing // If there's an error, continue instead of returning with nothing
for row in rows { for row in rows {
do { do {
// Fetches the magnet link // Enforce these parsers
// If the magnet is located on an external page, fetch the external page and grab the magnet link guard
// External page fetching affects source performance let magnetParser = htmlParser.magnetLink,
guard let magnetParser = htmlParser.magnetLink else { let titleParser = htmlParser.title
else {
continue continue
} }
// Fetches the episode/movie title
// Place here for filtering purposes
guard let title = try? runHtmlComplexQuery(
row: row,
query: titleParser.query,
attribute: titleParser.attribute,
regexString: titleParser.regex
) else {
continue
}
// Fetches the magnet link
// If the magnet is located on an external page, fetch the external page and grab the magnet link
// External page fetching affects source performance
var href: String var href: String
if let externalMagnetQuery = magnetParser.externalLinkQuery, !externalMagnetQuery.isEmpty { if let externalMagnetQuery = magnetParser.externalLinkQuery, !externalMagnetQuery.isEmpty {
guard let externalMagnetUrl = try row.select(externalMagnetQuery).first()?.attr("href") else {
continue
}
let replacedMagnetUrl = externalMagnetUrl.starts(with: "/") ? baseUrl + externalMagnetUrl : externalMagnetUrl
guard guard
let externalMagnetLink = try row.select(externalMagnetQuery).first()?.attr("href"), let data = await fetchWebsiteData(urlString: replacedMagnetUrl, sourceName: source.name),
let data = await fetchWebsiteData(urlString: baseUrl + externalMagnetLink, sourceName: source.name),
let magnetHtml = String(data: data, encoding: .utf8) let magnetHtml = String(data: data, encoding: .utf8)
else { else {
continue continue
@ -786,17 +817,6 @@ class ScrapingViewModel: ObservableObject {
continue continue
} }
// Fetches the episode/movie title
var title: String?
if let titleParser = htmlParser.title {
title = try? runHtmlComplexQuery(
row: row,
query: titleParser.query,
attribute: titleParser.attribute,
regexString: titleParser.regex
)
}
var subName: String? var subName: String?
if let subNameParser = htmlParser.subName { if let subNameParser = htmlParser.subName {
subName = try? runHtmlComplexQuery( subName = try? runHtmlComplexQuery(
@ -859,7 +879,7 @@ class ScrapingViewModel: ObservableObject {
} }
let result = SearchResult( let result = SearchResult(
title: title ?? "No title", title: title,
source: subName.map { "\(source.name) - \($0)" } ?? source.name, source: subName.map { "\(source.name) - \($0)" } ?? source.name,
size: size ?? "", size: size ?? "",
magnet: magnet, magnet: magnet,
@ -898,12 +918,25 @@ class ScrapingViewModel: ObservableObject {
parsedValue = try result?.attr(attribute) parsedValue = try result?.attr(attribute)
} }
// A capture group must be used in the provided regex if let parsedValue,
if let regexString, let regexString {
let parsedValue, return runRegex(parsedValue: parsedValue, regexString: regexString)
let regexValue = try? Regex(regexString).firstMatch(in: parsedValue)?.groups[safe: 0]?.value } else {
{ return parsedValue
return regexValue }
}
func runRegex(parsedValue: String, regexString: String) -> String? {
let replacedRegexString = regexString
.replacingOccurrences(of: "{query}", with: searchText)
guard let matchedRegex = try? Regex(replacedRegexString).firstMatch(in: parsedValue) else {
return nil
}
// Is there a capture group present? Otherwise return the original matched string
if let group = matchedRegex.groups[safe: 0] {
return group.value
} else { } else {
return parsedValue return parsedValue
} }

View file

@ -16,8 +16,6 @@ struct SearchResultsView: View {
@AppStorage("Behavior.UsesRandomSearchText") var usesRandomSearchText: Bool = false @AppStorage("Behavior.UsesRandomSearchText") var usesRandomSearchText: Bool = false
@Binding var searchText: String
@Binding var searchPrompt: String @Binding var searchPrompt: String
@State private var lastSearchPromptIndex: Int = -1 @State private var lastSearchPromptIndex: Int = -1
let searchBarTextArray: [String] = [ let searchBarTextArray: [String] = [
@ -38,7 +36,7 @@ struct SearchResultsView: View {
.onAppear { .onAppear {
searchPrompt = getSearchPrompt() searchPrompt = getSearchPrompt()
} }
.onChange(of: searchText) { newText in .onChange(of: scrapingModel.searchText) { newText in
if newText.isEmpty, isSearching { if newText.isEmpty, isSearching {
searchPrompt = getSearchPrompt() searchPrompt = getSearchPrompt()
} }

View file

@ -16,18 +16,18 @@ struct ContentView: View {
@AppStorage("Behavior.AutocorrectSearch") var autocorrectSearch: Bool = false @AppStorage("Behavior.AutocorrectSearch") var autocorrectSearch: Bool = false
@State private var searchText: String = "" // TODO: Fix searchPrompt updating
@State private var searchPrompt: String = "Search" @State private var searchPrompt: String = "Search"
var body: some View { var body: some View {
NavView { NavView {
List { List {
SearchResultsView(searchText: $searchText, searchPrompt: $searchPrompt) SearchResultsView(searchPrompt: $searchPrompt)
} }
.listStyle(.insetGrouped) .listStyle(.insetGrouped)
.inlinedList(inset: 20) .inlinedList(inset: 20)
.navigationTitle("Search") .navigationTitle("Search")
.searchable(text: $searchText, placement: .navigationBarDrawer(displayMode: .always), prompt: Text(searchPrompt)) .searchable(text: $scrapingModel.searchText, placement: .navigationBarDrawer(displayMode: .always), prompt: Text(searchPrompt))
.onSubmit(of: .search) { .onSubmit(of: .search) {
if let runningSearchTask = scrapingModel.runningSearchTask, runningSearchTask.isCancelled { if let runningSearchTask = scrapingModel.runningSearchTask, runningSearchTask.isCancelled {
scrapingModel.runningSearchTask = nil scrapingModel.runningSearchTask = nil
@ -38,7 +38,6 @@ struct ContentView: View {
let sources = pluginManager.fetchInstalledSources() let sources = pluginManager.fetchInstalledSources()
await scrapingModel.scanSources( await scrapingModel.scanSources(
sources: sources, sources: sources,
searchText: searchText,
debridManager: debridManager debridManager: debridManager
) )