mirror of
https://github.com/kodjodevf/mangayomi.git
synced 2026-04-21 16:01:58 +00:00
fix novel download and improve HTML content processing
This commit is contained in:
parent
dee1a8635c
commit
93298c7376
3 changed files with 105 additions and 38 deletions
|
|
@ -12,7 +12,11 @@ class PageUrl {
|
||||||
headers: (json['headers'] as Map?)?.toMapStringString,
|
headers: (json['headers'] as Map?)?.toMapStringString,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Map<String, dynamic> toJson() => {'url': url, 'headers': headers};
|
Map<String, dynamic> toJson() => {
|
||||||
|
'url': url,
|
||||||
|
'headers': headers,
|
||||||
|
'fileName': fileName,
|
||||||
|
};
|
||||||
|
|
||||||
@override
|
@override
|
||||||
String toString() {
|
String toString() {
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import 'dart:ui';
|
||||||
import 'package:connectivity_plus/connectivity_plus.dart';
|
import 'package:connectivity_plus/connectivity_plus.dart';
|
||||||
import 'package:flutter/widgets.dart';
|
import 'package:flutter/widgets.dart';
|
||||||
import 'package:isar_community/isar.dart';
|
import 'package:isar_community/isar.dart';
|
||||||
|
import 'package:mangayomi/eval/lib.dart';
|
||||||
import 'package:mangayomi/eval/model/m_bridge.dart';
|
import 'package:mangayomi/eval/model/m_bridge.dart';
|
||||||
import 'package:mangayomi/models/manga.dart';
|
import 'package:mangayomi/models/manga.dart';
|
||||||
import 'package:mangayomi/models/page.dart';
|
import 'package:mangayomi/models/page.dart';
|
||||||
|
|
@ -13,6 +14,7 @@ import 'package:mangayomi/models/download.dart';
|
||||||
import 'package:mangayomi/models/settings.dart';
|
import 'package:mangayomi/models/settings.dart';
|
||||||
import 'package:mangayomi/models/video.dart';
|
import 'package:mangayomi/models/video.dart';
|
||||||
import 'package:mangayomi/modules/manga/download/providers/convert_to_cbz.dart';
|
import 'package:mangayomi/modules/manga/download/providers/convert_to_cbz.dart';
|
||||||
|
import 'package:mangayomi/modules/more/settings/browse/providers/browse_state_provider.dart';
|
||||||
import 'package:mangayomi/modules/more/settings/downloads/providers/downloads_state_provider.dart';
|
import 'package:mangayomi/modules/more/settings/downloads/providers/downloads_state_provider.dart';
|
||||||
import 'package:mangayomi/providers/l10n_providers.dart';
|
import 'package:mangayomi/providers/l10n_providers.dart';
|
||||||
import 'package:mangayomi/providers/storage_provider.dart';
|
import 'package:mangayomi/providers/storage_provider.dart';
|
||||||
|
|
@ -27,6 +29,7 @@ import 'package:mangayomi/utils/extensions/chapter.dart';
|
||||||
import 'package:mangayomi/utils/extensions/string_extensions.dart';
|
import 'package:mangayomi/utils/extensions/string_extensions.dart';
|
||||||
import 'package:mangayomi/utils/headers.dart';
|
import 'package:mangayomi/utils/headers.dart';
|
||||||
import 'package:mangayomi/utils/reg_exp_matcher.dart';
|
import 'package:mangayomi/utils/reg_exp_matcher.dart';
|
||||||
|
import 'package:mangayomi/utils/utils.dart';
|
||||||
import 'package:path/path.dart' as p;
|
import 'package:path/path.dart' as p;
|
||||||
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
||||||
part 'download_provider.g.dart';
|
part 'download_provider.g.dart';
|
||||||
|
|
@ -73,6 +76,7 @@ Future<void> downloadChapter(
|
||||||
);
|
);
|
||||||
|
|
||||||
List<PageUrl> pageUrls = [];
|
List<PageUrl> pageUrls = [];
|
||||||
|
PageUrl? novelPage;
|
||||||
List<PageUrl> pages = [];
|
List<PageUrl> pages = [];
|
||||||
final StorageProvider storageProvider = StorageProvider();
|
final StorageProvider storageProvider = StorageProvider();
|
||||||
await storageProvider.requestPermission();
|
await storageProvider.requestPermission();
|
||||||
|
|
@ -225,18 +229,21 @@ Future<void> downloadChapter(
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else if (itemType == ItemType.novel && chapter.url != null) {
|
} else if (itemType == ItemType.novel && chapter.url != null) {
|
||||||
final cookie = MClient.getCookiesPref(chapter.url!);
|
final manga = chapter.manga.value!;
|
||||||
|
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
|
||||||
|
final chapterUrl = "${source.baseUrl}${chapter.url!.getUrlWithoutDomain}";
|
||||||
|
final cookie = MClient.getCookiesPref(chapterUrl);
|
||||||
final headers = htmlHeader;
|
final headers = htmlHeader;
|
||||||
if (cookie.isNotEmpty) {
|
if (cookie.isNotEmpty) {
|
||||||
final userAgent = isar.settings.getSync(227)!.userAgent!;
|
final userAgent = isar.settings.getSync(227)!.userAgent!;
|
||||||
headers.addAll(cookie);
|
headers.addAll(cookie);
|
||||||
headers[HttpHeaders.userAgentHeader] = userAgent;
|
headers[HttpHeaders.userAgentHeader] = userAgent;
|
||||||
}
|
}
|
||||||
final res = await http.get(Uri.parse(chapter.url!), headers: headers);
|
final res = await http.get(Uri.parse(chapterUrl), headers: headers);
|
||||||
if (res.headers.containsKey("Location")) {
|
if (res.headers.containsKey("Location")) {
|
||||||
pageUrls = [PageUrl(res.headers["Location"]!)];
|
novelPage = PageUrl(res.headers["Location"]!);
|
||||||
} else {
|
} else {
|
||||||
pageUrls = [PageUrl(chapter.url!)];
|
novelPage = PageUrl(chapterUrl);
|
||||||
}
|
}
|
||||||
isOk = true;
|
isOk = true;
|
||||||
}
|
}
|
||||||
|
|
@ -324,19 +331,6 @@ Future<void> downloadChapter(
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
final file = File(
|
|
||||||
p.join(chapterDirectory.path, "$chapterName.html"),
|
|
||||||
);
|
|
||||||
if (!file.existsSync()) {
|
|
||||||
pages.add(
|
|
||||||
PageUrl(
|
|
||||||
page.url.trim().trimLeft().trimRight(),
|
|
||||||
headers: pageHeaders,
|
|
||||||
fileName: p.join(chapterDirectory.path, "$chapterName.html"),
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -344,18 +338,7 @@ Future<void> downloadChapter(
|
||||||
if (pages.isEmpty && pageUrls.isNotEmpty) {
|
if (pages.isEmpty && pageUrls.isNotEmpty) {
|
||||||
await processConvert();
|
await processConvert();
|
||||||
savePageUrls();
|
savePageUrls();
|
||||||
final download = Download(
|
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
|
||||||
id: chapter.id,
|
|
||||||
succeeded: 0,
|
|
||||||
failed: 0,
|
|
||||||
total: 0,
|
|
||||||
isDownload: true,
|
|
||||||
isStartDownload: false,
|
|
||||||
);
|
|
||||||
|
|
||||||
isar.writeTxnSync(() {
|
|
||||||
isar.downloads.putSync(download..chapter.value = chapter);
|
|
||||||
});
|
|
||||||
} else {
|
} else {
|
||||||
savePageUrls();
|
savePageUrls();
|
||||||
await MDownloader(
|
await MDownloader(
|
||||||
|
|
@ -367,6 +350,24 @@ Future<void> downloadChapter(
|
||||||
setProgress(progress);
|
setProgress(progress);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
} else if (itemType == ItemType.novel) {
|
||||||
|
final file = File(p.join(chapterDirectory.path, "$chapterName.html"));
|
||||||
|
if (!file.existsSync() && novelPage != null) {
|
||||||
|
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
|
||||||
|
p.join(chapterDirectory.path, "$chapterName.html");
|
||||||
|
final html = await getExtensionService(
|
||||||
|
source,
|
||||||
|
ref.read(androidProxyServerStateProvider),
|
||||||
|
).getHtmlContent(chapter.manga.value!.name!, chapter.url!);
|
||||||
|
if (html.isNotEmpty) {
|
||||||
|
await file.writeAsString(html);
|
||||||
|
await setProgress(
|
||||||
|
DownloadProgress(1, 1, itemType, isCompleted: true),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
|
||||||
|
}
|
||||||
} else if (hasM3U8File) {
|
} else if (hasM3U8File) {
|
||||||
await m3u8Downloader?.download((progress) {
|
await m3u8Downloader?.download((progress) {
|
||||||
setProgress(progress);
|
setProgress(progress);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import 'dart:io';
|
import 'dart:io';
|
||||||
|
import 'package:path/path.dart' as p;
|
||||||
import 'package:epubx/epubx.dart';
|
import 'package:epubx/epubx.dart';
|
||||||
import 'package:html/parser.dart';
|
import 'package:html/parser.dart';
|
||||||
import 'package:mangayomi/eval/lib.dart';
|
import 'package:mangayomi/eval/lib.dart';
|
||||||
|
|
@ -36,15 +36,20 @@ Future<(String, EpubBook?)> getHtmlContent(
|
||||||
result = (_buildHtml("Local epub file not found!"), null);
|
result = (_buildHtml("Local epub file not found!"), null);
|
||||||
}
|
}
|
||||||
final storageProvider = StorageProvider();
|
final storageProvider = StorageProvider();
|
||||||
final mangaDirectory = await storageProvider.getMangaMainDirectory(chapter);
|
final mangaMainDirectory = await storageProvider.getMangaMainDirectory(
|
||||||
final htmlPath = "${mangaDirectory!.path}${chapter.name}.html";
|
chapter,
|
||||||
|
);
|
||||||
|
final chapterDirectory = (await storageProvider.getMangaChapterDirectory(
|
||||||
|
chapter,
|
||||||
|
mangaMainDirectory: mangaMainDirectory,
|
||||||
|
))!;
|
||||||
|
|
||||||
|
final htmlPath = p.join(chapterDirectory.path, "${chapter.name}.html");
|
||||||
|
|
||||||
final htmlFile = File(htmlPath);
|
final htmlFile = File(htmlPath);
|
||||||
String? htmlContent;
|
String? htmlContent;
|
||||||
if (await htmlFile.exists()) {
|
if (await htmlFile.exists()) {
|
||||||
htmlContent = await htmlFile.readAsString();
|
htmlContent = await htmlFile.readAsString();
|
||||||
final temp = parse(htmlContent);
|
|
||||||
temp.getElementsByTagName("script").forEach((el) => el.remove());
|
|
||||||
htmlContent = temp.outerHtml;
|
|
||||||
}
|
}
|
||||||
final source = getSource(
|
final source = getSource(
|
||||||
chapter.manga.value!.lang!,
|
chapter.manga.value!.lang!,
|
||||||
|
|
@ -74,8 +79,65 @@ Future<(String, EpubBook?)> getHtmlContent(
|
||||||
}
|
}
|
||||||
|
|
||||||
String _buildHtml(String input) {
|
String _buildHtml(String input) {
|
||||||
return '''<div id="readerViewContent"><div style="padding: 2em;">$input</div></div>'''
|
// Decode basic escapes
|
||||||
|
String cleaned = input
|
||||||
.replaceAll("\\n", "")
|
.replaceAll("\\n", "")
|
||||||
.replaceAll("\\t", "")
|
.replaceAll("\\t", "")
|
||||||
.replaceAll("\\\"", "\"");
|
.replaceAll("\\\"", "\"")
|
||||||
|
.replaceAll("\\'", "'")
|
||||||
|
.replaceAll("\\"", "\"")
|
||||||
|
.replaceAll(""", "\"");
|
||||||
|
|
||||||
|
// Parse HTML to clean it
|
||||||
|
final document = parse(cleaned);
|
||||||
|
|
||||||
|
// Remove unwanted elements
|
||||||
|
document.querySelectorAll('iframe').forEach((el) => el.remove());
|
||||||
|
document.querySelectorAll('script').forEach((el) => el.remove());
|
||||||
|
document.querySelectorAll('[data-aa]').forEach((el) => el.remove());
|
||||||
|
|
||||||
|
// Get cleaned HTML
|
||||||
|
String htmlContent = document.body?.innerHtml ?? cleaned;
|
||||||
|
|
||||||
|
// Decode HTML entities while keeping HTML tags
|
||||||
|
htmlContent = _decodeHtmlEntities(htmlContent);
|
||||||
|
|
||||||
|
return '''<div id="readerViewContent"><div style="padding: 2em;">$htmlContent</div></div>''';
|
||||||
|
}
|
||||||
|
|
||||||
|
String _decodeHtmlEntities(String html) {
|
||||||
|
// Decode numeric HTML entities (“, ’, etc.)
|
||||||
|
String decoded = html.replaceAllMapped(RegExp(r'&#(\d+);'), (match) {
|
||||||
|
final charCode = int.tryParse(match.group(1)!);
|
||||||
|
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Decode hexadecimal HTML entities (’, etc.)
|
||||||
|
decoded = decoded.replaceAllMapped(RegExp(r'&#x([0-9a-fA-F]+);'), (match) {
|
||||||
|
final charCode = int.tryParse(match.group(1)!, radix: 16);
|
||||||
|
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Decode common named HTML entities
|
||||||
|
final entities = {
|
||||||
|
'&': '&',
|
||||||
|
'<': '<',
|
||||||
|
'>': '>',
|
||||||
|
' ': ' ',
|
||||||
|
'"': '"',
|
||||||
|
''': "'",
|
||||||
|
'“': '"',
|
||||||
|
'”': '"',
|
||||||
|
'‘': ''',
|
||||||
|
'’': ''',
|
||||||
|
'—': '—',
|
||||||
|
'–': '–',
|
||||||
|
'…': '…',
|
||||||
|
};
|
||||||
|
|
||||||
|
entities.forEach((entity, replacement) {
|
||||||
|
decoded = decoded.replaceAll(entity, replacement);
|
||||||
|
});
|
||||||
|
|
||||||
|
return decoded;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue