mirror of
https://github.com/kodjodevf/mangayomi.git
synced 2026-01-11 22:40:36 +00:00
fix novel download and improve HTML content processing
This commit is contained in:
parent
dee1a8635c
commit
93298c7376
3 changed files with 105 additions and 38 deletions
|
|
@ -12,7 +12,11 @@ class PageUrl {
|
|||
headers: (json['headers'] as Map?)?.toMapStringString,
|
||||
);
|
||||
}
|
||||
Map<String, dynamic> toJson() => {'url': url, 'headers': headers};
|
||||
Map<String, dynamic> toJson() => {
|
||||
'url': url,
|
||||
'headers': headers,
|
||||
'fileName': fileName,
|
||||
};
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import 'dart:ui';
|
|||
import 'package:connectivity_plus/connectivity_plus.dart';
|
||||
import 'package:flutter/widgets.dart';
|
||||
import 'package:isar_community/isar.dart';
|
||||
import 'package:mangayomi/eval/lib.dart';
|
||||
import 'package:mangayomi/eval/model/m_bridge.dart';
|
||||
import 'package:mangayomi/models/manga.dart';
|
||||
import 'package:mangayomi/models/page.dart';
|
||||
|
|
@ -13,6 +14,7 @@ import 'package:mangayomi/models/download.dart';
|
|||
import 'package:mangayomi/models/settings.dart';
|
||||
import 'package:mangayomi/models/video.dart';
|
||||
import 'package:mangayomi/modules/manga/download/providers/convert_to_cbz.dart';
|
||||
import 'package:mangayomi/modules/more/settings/browse/providers/browse_state_provider.dart';
|
||||
import 'package:mangayomi/modules/more/settings/downloads/providers/downloads_state_provider.dart';
|
||||
import 'package:mangayomi/providers/l10n_providers.dart';
|
||||
import 'package:mangayomi/providers/storage_provider.dart';
|
||||
|
|
@ -27,6 +29,7 @@ import 'package:mangayomi/utils/extensions/chapter.dart';
|
|||
import 'package:mangayomi/utils/extensions/string_extensions.dart';
|
||||
import 'package:mangayomi/utils/headers.dart';
|
||||
import 'package:mangayomi/utils/reg_exp_matcher.dart';
|
||||
import 'package:mangayomi/utils/utils.dart';
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
||||
part 'download_provider.g.dart';
|
||||
|
|
@ -73,6 +76,7 @@ Future<void> downloadChapter(
|
|||
);
|
||||
|
||||
List<PageUrl> pageUrls = [];
|
||||
PageUrl? novelPage;
|
||||
List<PageUrl> pages = [];
|
||||
final StorageProvider storageProvider = StorageProvider();
|
||||
await storageProvider.requestPermission();
|
||||
|
|
@ -225,18 +229,21 @@ Future<void> downloadChapter(
|
|||
}
|
||||
});
|
||||
} else if (itemType == ItemType.novel && chapter.url != null) {
|
||||
final cookie = MClient.getCookiesPref(chapter.url!);
|
||||
final manga = chapter.manga.value!;
|
||||
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
|
||||
final chapterUrl = "${source.baseUrl}${chapter.url!.getUrlWithoutDomain}";
|
||||
final cookie = MClient.getCookiesPref(chapterUrl);
|
||||
final headers = htmlHeader;
|
||||
if (cookie.isNotEmpty) {
|
||||
final userAgent = isar.settings.getSync(227)!.userAgent!;
|
||||
headers.addAll(cookie);
|
||||
headers[HttpHeaders.userAgentHeader] = userAgent;
|
||||
}
|
||||
final res = await http.get(Uri.parse(chapter.url!), headers: headers);
|
||||
final res = await http.get(Uri.parse(chapterUrl), headers: headers);
|
||||
if (res.headers.containsKey("Location")) {
|
||||
pageUrls = [PageUrl(res.headers["Location"]!)];
|
||||
novelPage = PageUrl(res.headers["Location"]!);
|
||||
} else {
|
||||
pageUrls = [PageUrl(chapter.url!)];
|
||||
novelPage = PageUrl(chapterUrl);
|
||||
}
|
||||
isOk = true;
|
||||
}
|
||||
|
|
@ -324,19 +331,6 @@ Future<void> downloadChapter(
|
|||
),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
final file = File(
|
||||
p.join(chapterDirectory.path, "$chapterName.html"),
|
||||
);
|
||||
if (!file.existsSync()) {
|
||||
pages.add(
|
||||
PageUrl(
|
||||
page.url.trim().trimLeft().trimRight(),
|
||||
headers: pageHeaders,
|
||||
fileName: p.join(chapterDirectory.path, "$chapterName.html"),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -344,18 +338,7 @@ Future<void> downloadChapter(
|
|||
if (pages.isEmpty && pageUrls.isNotEmpty) {
|
||||
await processConvert();
|
||||
savePageUrls();
|
||||
final download = Download(
|
||||
id: chapter.id,
|
||||
succeeded: 0,
|
||||
failed: 0,
|
||||
total: 0,
|
||||
isDownload: true,
|
||||
isStartDownload: false,
|
||||
);
|
||||
|
||||
isar.writeTxnSync(() {
|
||||
isar.downloads.putSync(download..chapter.value = chapter);
|
||||
});
|
||||
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
|
||||
} else {
|
||||
savePageUrls();
|
||||
await MDownloader(
|
||||
|
|
@ -367,6 +350,24 @@ Future<void> downloadChapter(
|
|||
setProgress(progress);
|
||||
});
|
||||
}
|
||||
} else if (itemType == ItemType.novel) {
|
||||
final file = File(p.join(chapterDirectory.path, "$chapterName.html"));
|
||||
if (!file.existsSync() && novelPage != null) {
|
||||
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
|
||||
p.join(chapterDirectory.path, "$chapterName.html");
|
||||
final html = await getExtensionService(
|
||||
source,
|
||||
ref.read(androidProxyServerStateProvider),
|
||||
).getHtmlContent(chapter.manga.value!.name!, chapter.url!);
|
||||
if (html.isNotEmpty) {
|
||||
await file.writeAsString(html);
|
||||
await setProgress(
|
||||
DownloadProgress(1, 1, itemType, isCompleted: true),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
|
||||
}
|
||||
} else if (hasM3U8File) {
|
||||
await m3u8Downloader?.download((progress) {
|
||||
setProgress(progress);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import 'dart:io';
|
||||
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:epubx/epubx.dart';
|
||||
import 'package:html/parser.dart';
|
||||
import 'package:mangayomi/eval/lib.dart';
|
||||
|
|
@ -36,15 +36,20 @@ Future<(String, EpubBook?)> getHtmlContent(
|
|||
result = (_buildHtml("Local epub file not found!"), null);
|
||||
}
|
||||
final storageProvider = StorageProvider();
|
||||
final mangaDirectory = await storageProvider.getMangaMainDirectory(chapter);
|
||||
final htmlPath = "${mangaDirectory!.path}${chapter.name}.html";
|
||||
final mangaMainDirectory = await storageProvider.getMangaMainDirectory(
|
||||
chapter,
|
||||
);
|
||||
final chapterDirectory = (await storageProvider.getMangaChapterDirectory(
|
||||
chapter,
|
||||
mangaMainDirectory: mangaMainDirectory,
|
||||
))!;
|
||||
|
||||
final htmlPath = p.join(chapterDirectory.path, "${chapter.name}.html");
|
||||
|
||||
final htmlFile = File(htmlPath);
|
||||
String? htmlContent;
|
||||
if (await htmlFile.exists()) {
|
||||
htmlContent = await htmlFile.readAsString();
|
||||
final temp = parse(htmlContent);
|
||||
temp.getElementsByTagName("script").forEach((el) => el.remove());
|
||||
htmlContent = temp.outerHtml;
|
||||
}
|
||||
final source = getSource(
|
||||
chapter.manga.value!.lang!,
|
||||
|
|
@ -74,8 +79,65 @@ Future<(String, EpubBook?)> getHtmlContent(
|
|||
}
|
||||
|
||||
String _buildHtml(String input) {
|
||||
return '''<div id="readerViewContent"><div style="padding: 2em;">$input</div></div>'''
|
||||
// Decode basic escapes
|
||||
String cleaned = input
|
||||
.replaceAll("\\n", "")
|
||||
.replaceAll("\\t", "")
|
||||
.replaceAll("\\\"", "\"");
|
||||
.replaceAll("\\\"", "\"")
|
||||
.replaceAll("\\'", "'")
|
||||
.replaceAll("\\"", "\"")
|
||||
.replaceAll(""", "\"");
|
||||
|
||||
// Parse HTML to clean it
|
||||
final document = parse(cleaned);
|
||||
|
||||
// Remove unwanted elements
|
||||
document.querySelectorAll('iframe').forEach((el) => el.remove());
|
||||
document.querySelectorAll('script').forEach((el) => el.remove());
|
||||
document.querySelectorAll('[data-aa]').forEach((el) => el.remove());
|
||||
|
||||
// Get cleaned HTML
|
||||
String htmlContent = document.body?.innerHtml ?? cleaned;
|
||||
|
||||
// Decode HTML entities while keeping HTML tags
|
||||
htmlContent = _decodeHtmlEntities(htmlContent);
|
||||
|
||||
return '''<div id="readerViewContent"><div style="padding: 2em;">$htmlContent</div></div>''';
|
||||
}
|
||||
|
||||
String _decodeHtmlEntities(String html) {
|
||||
// Decode numeric HTML entities (“, ’, etc.)
|
||||
String decoded = html.replaceAllMapped(RegExp(r'&#(\d+);'), (match) {
|
||||
final charCode = int.tryParse(match.group(1)!);
|
||||
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
|
||||
});
|
||||
|
||||
// Decode hexadecimal HTML entities (’, etc.)
|
||||
decoded = decoded.replaceAllMapped(RegExp(r'&#x([0-9a-fA-F]+);'), (match) {
|
||||
final charCode = int.tryParse(match.group(1)!, radix: 16);
|
||||
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
|
||||
});
|
||||
|
||||
// Decode common named HTML entities
|
||||
final entities = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
' ': ' ',
|
||||
'"': '"',
|
||||
''': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'‘': ''',
|
||||
'’': ''',
|
||||
'—': '—',
|
||||
'–': '–',
|
||||
'…': '…',
|
||||
};
|
||||
|
||||
entities.forEach((entity, replacement) {
|
||||
decoded = decoded.replaceAll(entity, replacement);
|
||||
});
|
||||
|
||||
return decoded;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue