fix novel download and improve HTML content processing

This commit is contained in:
Moustapha Kodjo Amadou 2025-11-08 23:37:22 +01:00
parent dee1a8635c
commit 93298c7376
3 changed files with 105 additions and 38 deletions

View file

@ -12,7 +12,11 @@ class PageUrl {
headers: (json['headers'] as Map?)?.toMapStringString,
);
}
Map<String, dynamic> toJson() => {'url': url, 'headers': headers};
Map<String, dynamic> toJson() => {
'url': url,
'headers': headers,
'fileName': fileName,
};
@override
String toString() {

View file

@ -4,6 +4,7 @@ import 'dart:ui';
import 'package:connectivity_plus/connectivity_plus.dart';
import 'package:flutter/widgets.dart';
import 'package:isar_community/isar.dart';
import 'package:mangayomi/eval/lib.dart';
import 'package:mangayomi/eval/model/m_bridge.dart';
import 'package:mangayomi/models/manga.dart';
import 'package:mangayomi/models/page.dart';
@ -13,6 +14,7 @@ import 'package:mangayomi/models/download.dart';
import 'package:mangayomi/models/settings.dart';
import 'package:mangayomi/models/video.dart';
import 'package:mangayomi/modules/manga/download/providers/convert_to_cbz.dart';
import 'package:mangayomi/modules/more/settings/browse/providers/browse_state_provider.dart';
import 'package:mangayomi/modules/more/settings/downloads/providers/downloads_state_provider.dart';
import 'package:mangayomi/providers/l10n_providers.dart';
import 'package:mangayomi/providers/storage_provider.dart';
@ -27,6 +29,7 @@ import 'package:mangayomi/utils/extensions/chapter.dart';
import 'package:mangayomi/utils/extensions/string_extensions.dart';
import 'package:mangayomi/utils/headers.dart';
import 'package:mangayomi/utils/reg_exp_matcher.dart';
import 'package:mangayomi/utils/utils.dart';
import 'package:path/path.dart' as p;
import 'package:riverpod_annotation/riverpod_annotation.dart';
part 'download_provider.g.dart';
@ -73,6 +76,7 @@ Future<void> downloadChapter(
);
List<PageUrl> pageUrls = [];
PageUrl? novelPage;
List<PageUrl> pages = [];
final StorageProvider storageProvider = StorageProvider();
await storageProvider.requestPermission();
@ -225,18 +229,21 @@ Future<void> downloadChapter(
}
});
} else if (itemType == ItemType.novel && chapter.url != null) {
final cookie = MClient.getCookiesPref(chapter.url!);
final manga = chapter.manga.value!;
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
final chapterUrl = "${source.baseUrl}${chapter.url!.getUrlWithoutDomain}";
final cookie = MClient.getCookiesPref(chapterUrl);
final headers = htmlHeader;
if (cookie.isNotEmpty) {
final userAgent = isar.settings.getSync(227)!.userAgent!;
headers.addAll(cookie);
headers[HttpHeaders.userAgentHeader] = userAgent;
}
final res = await http.get(Uri.parse(chapter.url!), headers: headers);
final res = await http.get(Uri.parse(chapterUrl), headers: headers);
if (res.headers.containsKey("Location")) {
pageUrls = [PageUrl(res.headers["Location"]!)];
novelPage = PageUrl(res.headers["Location"]!);
} else {
pageUrls = [PageUrl(chapter.url!)];
novelPage = PageUrl(chapterUrl);
}
isOk = true;
}
@ -324,19 +331,6 @@ Future<void> downloadChapter(
),
);
}
} else {
final file = File(
p.join(chapterDirectory.path, "$chapterName.html"),
);
if (!file.existsSync()) {
pages.add(
PageUrl(
page.url.trim().trimLeft().trimRight(),
headers: pageHeaders,
fileName: p.join(chapterDirectory.path, "$chapterName.html"),
),
);
}
}
}
}
@ -344,18 +338,7 @@ Future<void> downloadChapter(
if (pages.isEmpty && pageUrls.isNotEmpty) {
await processConvert();
savePageUrls();
final download = Download(
id: chapter.id,
succeeded: 0,
failed: 0,
total: 0,
isDownload: true,
isStartDownload: false,
);
isar.writeTxnSync(() {
isar.downloads.putSync(download..chapter.value = chapter);
});
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
} else {
savePageUrls();
await MDownloader(
@ -367,6 +350,24 @@ Future<void> downloadChapter(
setProgress(progress);
});
}
} else if (itemType == ItemType.novel) {
final file = File(p.join(chapterDirectory.path, "$chapterName.html"));
if (!file.existsSync() && novelPage != null) {
final source = getSource(manga.lang!, manga.source!, manga.sourceId)!;
p.join(chapterDirectory.path, "$chapterName.html");
final html = await getExtensionService(
source,
ref.read(androidProxyServerStateProvider),
).getHtmlContent(chapter.manga.value!.name!, chapter.url!);
if (html.isNotEmpty) {
await file.writeAsString(html);
await setProgress(
DownloadProgress(1, 1, itemType, isCompleted: true),
);
}
} else {
await setProgress(DownloadProgress(1, 1, itemType, isCompleted: true));
}
} else if (hasM3U8File) {
await m3u8Downloader?.download((progress) {
setProgress(progress);

View file

@ -1,5 +1,5 @@
import 'dart:io';
import 'package:path/path.dart' as p;
import 'package:epubx/epubx.dart';
import 'package:html/parser.dart';
import 'package:mangayomi/eval/lib.dart';
@ -36,15 +36,20 @@ Future<(String, EpubBook?)> getHtmlContent(
result = (_buildHtml("Local epub file not found!"), null);
}
final storageProvider = StorageProvider();
final mangaDirectory = await storageProvider.getMangaMainDirectory(chapter);
final htmlPath = "${mangaDirectory!.path}${chapter.name}.html";
final mangaMainDirectory = await storageProvider.getMangaMainDirectory(
chapter,
);
final chapterDirectory = (await storageProvider.getMangaChapterDirectory(
chapter,
mangaMainDirectory: mangaMainDirectory,
))!;
final htmlPath = p.join(chapterDirectory.path, "${chapter.name}.html");
final htmlFile = File(htmlPath);
String? htmlContent;
if (await htmlFile.exists()) {
htmlContent = await htmlFile.readAsString();
final temp = parse(htmlContent);
temp.getElementsByTagName("script").forEach((el) => el.remove());
htmlContent = temp.outerHtml;
}
final source = getSource(
chapter.manga.value!.lang!,
@ -74,8 +79,65 @@ Future<(String, EpubBook?)> getHtmlContent(
}
String _buildHtml(String input) {
return '''<div id="readerViewContent"><div style="padding: 2em;">$input</div></div>'''
// Decode basic escapes
String cleaned = input
.replaceAll("\\n", "")
.replaceAll("\\t", "")
.replaceAll("\\\"", "\"");
.replaceAll("\\\"", "\"")
.replaceAll("\\'", "'")
.replaceAll("\\&quot;", "\"")
.replaceAll("&quot;", "\"");
// Parse HTML to clean it
final document = parse(cleaned);
// Remove unwanted elements
document.querySelectorAll('iframe').forEach((el) => el.remove());
document.querySelectorAll('script').forEach((el) => el.remove());
document.querySelectorAll('[data-aa]').forEach((el) => el.remove());
// Get cleaned HTML
String htmlContent = document.body?.innerHtml ?? cleaned;
// Decode HTML entities while keeping HTML tags
htmlContent = _decodeHtmlEntities(htmlContent);
return '''<div id="readerViewContent"><div style="padding: 2em;">$htmlContent</div></div>''';
}
String _decodeHtmlEntities(String html) {
// Decode numeric HTML entities (&#8220;, &#8217;, etc.)
String decoded = html.replaceAllMapped(RegExp(r'&#(\d+);'), (match) {
final charCode = int.tryParse(match.group(1)!);
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
});
// Decode hexadecimal HTML entities (&#x2019;, etc.)
decoded = decoded.replaceAllMapped(RegExp(r'&#x([0-9a-fA-F]+);'), (match) {
final charCode = int.tryParse(match.group(1)!, radix: 16);
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
});
// Decode common named HTML entities
final entities = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&nbsp;': ' ',
'&quot;': '"',
'&apos;': "'",
'&ldquo;': '"',
'&rdquo;': '"',
'&lsquo;': ''',
'&rsquo;': ''',
'&mdash;': '',
'&ndash;': '',
'&hellip;': '',
};
entities.forEach((entity, replacement) {
decoded = decoded.replaceAll(entity, replacement);
});
return decoded;
}