import 'dart:io';
import 'package:mangayomi/src/rust/api/epub.dart';
import 'package:path/path.dart' as p;
import 'package:html/parser.dart';
import 'package:mangayomi/eval/lib.dart';
import 'package:mangayomi/models/chapter.dart';
import 'package:mangayomi/modules/more/settings/browse/providers/browse_state_provider.dart';
import 'package:mangayomi/providers/storage_provider.dart';
import 'package:mangayomi/utils/utils.dart';
import 'package:riverpod_annotation/riverpod_annotation.dart';
part 'get_html_content.g.dart';
@riverpod
Future<(String, EpubNovel?)> getHtmlContent(
Ref ref, {
required Chapter chapter,
}) async {
final keepAlive = ref.keepAlive();
(String, EpubNovel?)? result;
try {
if (!chapter.manga.isLoaded) {
chapter.manga.loadSync();
}
if (chapter.archivePath != null && chapter.archivePath!.isNotEmpty) {
try {
final book = await parseEpubFromPath(
epubPath: chapter.archivePath!,
fullData: true,
);
String htmlContent = "";
for (var subChapter in book.chapters) {
htmlContent += "\n
\n${subChapter.content}";
}
result = (_buildHtml(htmlContent), book);
} catch (_) {}
result ??= (_buildHtml("Local epub file not found!"), null);
}
if (result == null) {
final storageProvider = StorageProvider();
final mangaMainDirectory = await storageProvider.getMangaMainDirectory(
chapter,
);
final chapterDirectory = (await storageProvider.getMangaChapterDirectory(
chapter,
mangaMainDirectory: mangaMainDirectory,
))!;
final htmlPath = p.join(chapterDirectory.path, "${chapter.name}.html");
final htmlFile = File(htmlPath);
String? htmlContent;
if (await htmlFile.exists()) {
htmlContent = await htmlFile.readAsString();
}
final source = getSource(
chapter.manga.value!.lang!,
chapter.manga.value!.source!,
chapter.manga.value!.sourceId,
);
final proxyServer = ref.read(androidProxyServerStateProvider);
final html = await withExtensionService(source!, proxyServer, (
service,
) async {
if (htmlContent != null) {
return await service.cleanHtmlContent(htmlContent);
} else {
return await service.getHtmlContent(
chapter.manga.value!.name!,
chapter.url!,
);
}
});
result = (_buildHtml(html.substring(1, html.length - 1)), null);
}
keepAlive.close();
return result;
} catch (e) {
keepAlive.close();
rethrow;
}
}
String _buildHtml(String input) {
// Decode basic escapes
String cleaned = input
.replaceAll("\\n", "")
.replaceAll("\\t", "")
.replaceAll("\\\"", "\"")
.replaceAll("\\'", "'")
.replaceAll("\\"", "\"")
.replaceAll(""", "\"");
// Parse HTML to clean it
final document = parse(cleaned);
// Remove unwanted elements (ads, tracking, etc.)
document.querySelectorAll('iframe').forEach((el) => el.remove());
document.querySelectorAll('script').forEach((el) => el.remove());
document.querySelectorAll('[data-aa]').forEach((el) => el.remove());
// Improve styles for EPUB tables
document.querySelectorAll('table').forEach((table) {
table.attributes['style'] =
'${table.attributes['style'] ?? ''} border-collapse: collapse; width: 100%; margin: 10px 0;';
});
document.querySelectorAll('td, th').forEach((cell) {
cell.attributes['style'] =
'${cell.attributes['style'] ?? ''} border: 1px solid #ddd; padding: 8px;';
});
// Improve citations/blockquotes
document.querySelectorAll('blockquote').forEach((quote) {
quote.attributes['style'] =
'${quote.attributes['style'] ?? ''} border-left: 4px solid #ccc; padding-left: 15px; margin: 10px 0; font-style: italic;';
});
// Get cleaned HTML
String htmlContent = document.body?.innerHtml ?? cleaned;
// Decode HTML entities while keeping HTML tags
htmlContent = _decodeHtmlEntities(htmlContent);
return '''$htmlContent
''';
}
String _decodeHtmlEntities(String html) {
// Decode numeric HTML entities (“, ’, etc.)
String decoded = html.replaceAllMapped(RegExp(r'(\d+);'), (match) {
final charCode = int.tryParse(match.group(1)!);
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
});
// Decode hexadecimal HTML entities (’, etc.)
decoded = decoded.replaceAllMapped(RegExp(r'([0-9a-fA-F]+);'), (match) {
final charCode = int.tryParse(match.group(1)!, radix: 16);
return charCode != null ? String.fromCharCode(charCode) : match.group(0)!;
});
// Decode common named HTML entities
final entities = {
'&': '&',
'<': '<',
'>': '>',
' ': ' ',
'"': '"',
''': "'",
'“': '"',
'”': '"',
'‘': ''',
'’': ''',
'—': '—',
'–': '–',
'…': '…',
};
entities.forEach((entity, replacement) {
decoded = decoded.replaceAll(entity, replacement);
});
return decoded;
}