From 2a69d77c0bba541e02faac43e1eac7fbb2a6074f Mon Sep 17 00:00:00 2001 From: Pas <74743263+Pasithea0@users.noreply.github.com> Date: Mon, 2 Jun 2025 12:51:11 -0600 Subject: [PATCH] try to convert subtitles to utf8 --- src/backend/helpers/subs.ts | 48 +++++++++++++++++-- .../player/atoms/settings/CaptionsView.tsx | 19 ++++++-- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/backend/helpers/subs.ts b/src/backend/helpers/subs.ts index 01bf4f7e..a49aa247 100644 --- a/src/backend/helpers/subs.ts +++ b/src/backend/helpers/subs.ts @@ -40,14 +40,35 @@ export async function downloadCaption( data = extensionResponse.response.body; } else { - data = await proxiedFetch(caption.url, { responseType: "text" }); + data = await proxiedFetch(caption.url, { + responseType: "text", + headers: { + "Accept-Charset": "utf-8", + }, + }); } } else { - data = await fetch(caption.url).then((v) => v.text()); + const response = await fetch(caption.url); + const contentType = response.headers.get("content-type") || ""; + const charset = contentType.includes("charset=") + ? contentType.split("charset=")[1].toLowerCase() + : "utf-8"; + + // Get the raw bytes + const buffer = await response.arrayBuffer(); + // Decode using the detected charset, defaulting to UTF-8 + const decoder = new TextDecoder(charset); + data = decoder.decode(buffer); } if (!data) throw new Error("failed to get caption data"); - const output = convertSubtitlesToSrt(data); + // Ensure the data is in UTF-8 + const encoder = new TextEncoder(); + const decoder = new TextDecoder("utf-8"); + const utf8Bytes = encoder.encode(data); + const utf8Data = decoder.decode(utf8Bytes); + + const output = convertSubtitlesToSrt(utf8Data); downloadCache.set(caption.url, output, expirySeconds); return output; } @@ -60,6 +81,23 @@ export async function downloadWebVTT(url: string): Promise { const cached = downloadCache.get(url); if (cached) return cached; - const data = await fetch(url).then((v) => v.text()); - return data; + const response = await fetch(url); + const contentType = response.headers.get("content-type") || ""; + const charset = contentType.includes("charset=") + ? contentType.split("charset=")[1].toLowerCase() + : "utf-8"; + + // Get the raw bytes + const buffer = await response.arrayBuffer(); + // Decode using the detected charset, defaulting to UTF-8 + const decoder = new TextDecoder(charset); + const data = decoder.decode(buffer); + + // Ensure the data is in UTF-8 + const encoder = new TextEncoder(); + const utf8Bytes = encoder.encode(data); + const utf8Data = decoder.decode(utf8Bytes); + + downloadCache.set(url, utf8Data, expirySeconds); + return utf8Data; } diff --git a/src/components/player/atoms/settings/CaptionsView.tsx b/src/components/player/atoms/settings/CaptionsView.tsx index ff67f422..9350d3ab 100644 --- a/src/components/player/atoms/settings/CaptionsView.tsx +++ b/src/components/player/atoms/settings/CaptionsView.tsx @@ -70,7 +70,14 @@ export function CustomCaptionOption() { reader.addEventListener("load", (event) => { if (!event.target || typeof event.target.result !== "string") return; - const converted = convert(event.target.result, "srt"); + + // Ensure the data is in UTF-8 + const encoder = new TextEncoder(); + const decoder = new TextDecoder("utf-8"); + const utf8Bytes = encoder.encode(event.target.result); + const utf8Data = decoder.decode(utf8Bytes); + + const converted = convert(utf8Data, "srt"); setCaption({ language: "custom", srtData: converted, @@ -116,7 +123,13 @@ export function CaptionsView({ reader.addEventListener("load", (e) => { if (!e.target || typeof e.target.result !== "string") return; - const converted = convert(e.target.result, "srt"); + // Ensure the data is in UTF-8 + const encoder = new TextEncoder(); + const decoder = new TextDecoder("utf-8"); + const utf8Bytes = encoder.encode(e.target.result); + const utf8Data = decoder.decode(utf8Bytes); + + const converted = convert(utf8Data, "srt"); setCaption({ language: "custom", @@ -125,7 +138,7 @@ export function CaptionsView({ }); }); - reader.readAsText(firstFile); + reader.readAsText(firstFile, "utf-8"); } const selectedLanguagePretty = selectedCaptionLanguage