revert to old captions/sub handling

This commit is contained in:
Pas 2025-10-24 22:34:44 -06:00
parent fe426a7413
commit d8070e5707
3 changed files with 9 additions and 113 deletions

View file

@ -1,10 +1,7 @@
import { list } from "subsrt-ts";
import { proxiedFetch } from "@/backend/helpers/fetch";
import {
convertSubtitlesToSrt,
fixUTF8Encoding,
} from "@/components/player/utils/captions";
import { convertSubtitlesToSrt } from "@/components/player/utils/captions";
import { CaptionListItem } from "@/stores/player/slices/source";
import { SimpleCache } from "@/utils/cache";
@ -65,14 +62,7 @@ export async function downloadCaption(
}
if (!data) throw new Error("failed to get caption data");
// Ensure the data is in UTF-8 and fix any encoding issues
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(data);
const utf8Data = decoder.decode(utf8Bytes);
const fixedData = fixUTF8Encoding(utf8Data);
const output = convertSubtitlesToSrt(fixedData);
const output = convertSubtitlesToSrt(data);
downloadCache.set(caption.url, output, expirySeconds);
return output;
}
@ -85,24 +75,6 @@ export async function downloadWebVTT(url: string): Promise<string> {
const cached = downloadCache.get(url);
if (cached) return cached;
const response = await fetch(url);
const contentType = response.headers.get("content-type") || "";
const charset = contentType.includes("charset=")
? contentType.split("charset=")[1].toLowerCase()
: "utf-8";
// Get the raw bytes
const buffer = await response.arrayBuffer();
// Decode using the detected charset, defaulting to UTF-8
const decoder = new TextDecoder(charset);
const data = decoder.decode(buffer);
// Ensure the data is in UTF-8 and fix any encoding issues
const encoder = new TextEncoder();
const utf8Bytes = encoder.encode(data);
const utf8Data = decoder.decode(utf8Bytes);
const fixedData = fixUTF8Encoding(utf8Data);
downloadCache.set(url, fixedData, expirySeconds);
return fixedData;
const data = await fetch(url).then((v) => v.text());
return data;
}

View file

@ -15,7 +15,6 @@ import { Input } from "@/components/player/internals/ContextMenu/Input";
import { SelectableLink } from "@/components/player/internals/ContextMenu/Links";
import {
captionIsVisible,
fixUTF8Encoding,
parseSubtitles,
} from "@/components/player/utils/captions";
import { useOverlayRouter } from "@/hooks/useOverlayRouter";
@ -205,15 +204,7 @@ export function CustomCaptionOption() {
reader.addEventListener("load", (event) => {
if (!event.target || typeof event.target.result !== "string")
return;
// Ensure the data is in UTF-8 and fix any encoding issues
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(event.target.result);
const utf8Data = decoder.decode(utf8Bytes);
const fixedData = fixUTF8Encoding(utf8Data);
const converted = convert(fixedData, "srt");
const converted = convert(event.target.result, "srt");
setCaption({
language: "custom",
srtData: converted,
@ -309,13 +300,7 @@ export function CaptionsView({
reader.addEventListener("load", (e) => {
if (!e.target || typeof e.target.result !== "string") return;
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(e.target.result);
const utf8Data = decoder.decode(utf8Bytes);
const fixedData = fixUTF8Encoding(utf8Data);
const converted = convert(fixedData, "srt");
const converted = convert(e.target.result, "srt");
setCaption({
language: "custom",
@ -324,7 +309,7 @@ export function CaptionsView({
});
});
reader.readAsText(firstFile, "utf-8");
reader.readAsText(firstFile);
}
// Render subtitle option

View file

@ -8,63 +8,6 @@ import { CaptionListItem } from "@/stores/player/slices/source";
export type CaptionCueType = ContentCaption;
export const sanitize = DOMPurify.sanitize;
// UTF-8 character mapping for fixing corrupted special characters
const utf8Map: Record<string, string> = {
"ä": "ä",
"Ä": "Ä",
"ä": "ä",
"Ä": "Ä",
"ö": "ö",
"ö": "ö",
"Ã¥": "å",
"Ã¥": "å",
"é": "é",
"é": "é",
ú: "ú",
ú: "ú",
"ñ": "ñ",
"ñ": "ñ",
"á": "á",
"á": "á",
"í": "í",
"í": "í",
"ó": "ó",
"ó": "ó",
"ü": "ü",
"ü": "ü",
"ç": "ç",
"ç": "ç",
"è": "è",
"è": "è",
"ì": "ì",
"ì": "ì",
"ò": "ò",
"ò": "ò",
"ù": "ù",
"ù": "ù",
ÃÂ: "à",
Ã: "à",
"Â": "",
Â: "",
"Â ": "",
};
/**
* Fixes UTF-8 encoding issues in subtitle text
* Handles common cases where special characters and accents get corrupted
*
* Example:
* Input: "Hyvä on, ohjelma oli tässä."
* Output: "Hyvä on, ohjelma oli tässä."
*/
export function fixUTF8Encoding(text: string): string {
let fixedText = text;
Object.keys(utf8Map).forEach((bad) => {
fixedText = fixedText.split(bad).join(utf8Map[bad]);
});
return fixedText;
}
export function captionIsVisible(
start: number,
end: number,
@ -88,9 +31,7 @@ export function convertSubtitlesToVtt(text: string): string {
if (textTrimmed === "") {
throw new Error("Given text is empty");
}
// Fix UTF-8 encoding issues before conversion
const fixedText = fixUTF8Encoding(textTrimmed);
const vtt = convert(fixedText, "vtt");
const vtt = convert(textTrimmed, "vtt");
if (detect(vtt) === "") {
throw new Error("Invalid subtitle format");
}
@ -102,9 +43,7 @@ export function convertSubtitlesToSrt(text: string): string {
if (textTrimmed === "") {
throw new Error("Given text is empty");
}
// Fix UTF-8 encoding issues before conversion
const fixedText = fixUTF8Encoding(textTrimmed);
const srt = convert(fixedText, "srt");
const srt = convert(textTrimmed, "srt");
if (detect(srt) === "") {
throw new Error("Invalid subtitle format");
}