try to convert subtitles to utf8

This commit is contained in:
Pas 2025-06-02 12:51:11 -06:00
parent fcba122399
commit 2a69d77c0b
2 changed files with 59 additions and 8 deletions

View file

@ -40,14 +40,35 @@ export async function downloadCaption(
data = extensionResponse.response.body;
} else {
data = await proxiedFetch<string>(caption.url, { responseType: "text" });
data = await proxiedFetch<string>(caption.url, {
responseType: "text",
headers: {
"Accept-Charset": "utf-8",
},
});
}
} else {
data = await fetch(caption.url).then((v) => v.text());
const response = await fetch(caption.url);
const contentType = response.headers.get("content-type") || "";
const charset = contentType.includes("charset=")
? contentType.split("charset=")[1].toLowerCase()
: "utf-8";
// Get the raw bytes
const buffer = await response.arrayBuffer();
// Decode using the detected charset, defaulting to UTF-8
const decoder = new TextDecoder(charset);
data = decoder.decode(buffer);
}
if (!data) throw new Error("failed to get caption data");
const output = convertSubtitlesToSrt(data);
// Ensure the data is in UTF-8
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(data);
const utf8Data = decoder.decode(utf8Bytes);
const output = convertSubtitlesToSrt(utf8Data);
downloadCache.set(caption.url, output, expirySeconds);
return output;
}
@ -60,6 +81,23 @@ export async function downloadWebVTT(url: string): Promise<string> {
const cached = downloadCache.get(url);
if (cached) return cached;
const data = await fetch(url).then((v) => v.text());
return data;
const response = await fetch(url);
const contentType = response.headers.get("content-type") || "";
const charset = contentType.includes("charset=")
? contentType.split("charset=")[1].toLowerCase()
: "utf-8";
// Get the raw bytes
const buffer = await response.arrayBuffer();
// Decode using the detected charset, defaulting to UTF-8
const decoder = new TextDecoder(charset);
const data = decoder.decode(buffer);
// Ensure the data is in UTF-8
const encoder = new TextEncoder();
const utf8Bytes = encoder.encode(data);
const utf8Data = decoder.decode(utf8Bytes);
downloadCache.set(url, utf8Data, expirySeconds);
return utf8Data;
}

View file

@ -70,7 +70,14 @@ export function CustomCaptionOption() {
reader.addEventListener("load", (event) => {
if (!event.target || typeof event.target.result !== "string")
return;
const converted = convert(event.target.result, "srt");
// Ensure the data is in UTF-8
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(event.target.result);
const utf8Data = decoder.decode(utf8Bytes);
const converted = convert(utf8Data, "srt");
setCaption({
language: "custom",
srtData: converted,
@ -116,7 +123,13 @@ export function CaptionsView({
reader.addEventListener("load", (e) => {
if (!e.target || typeof e.target.result !== "string") return;
const converted = convert(e.target.result, "srt");
// Ensure the data is in UTF-8
const encoder = new TextEncoder();
const decoder = new TextDecoder("utf-8");
const utf8Bytes = encoder.encode(e.target.result);
const utf8Data = decoder.decode(utf8Bytes);
const converted = convert(utf8Data, "srt");
setCaption({
language: "custom",
@ -125,7 +138,7 @@ export function CaptionsView({
});
});
reader.readAsText(firstFile);
reader.readAsText(firstFile, "utf-8");
}
const selectedLanguagePretty = selectedCaptionLanguage