From 5aea7724775fc654032e90860e49a245e6c5ce55 Mon Sep 17 00:00:00 2001 From: vlOd2 <66838724+vlOd2@users.noreply.github.com> Date: Thu, 25 Dec 2025 22:52:50 +0200 Subject: [PATCH] Prepare for captions translation --- .vscode/settings.json | 8 +- pnpm-lock.yaml | 8 +- .../player/utils/captionstranslation.ts | 201 ++++++++++++++++++ src/pages/developer/TestView.tsx | 70 +++++- 4 files changed, 278 insertions(+), 9 deletions(-) create mode 100644 src/components/player/utils/captionstranslation.ts diff --git a/.vscode/settings.json b/.vscode/settings.json index 5d671d55..cb78ef1f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,7 +5,13 @@ "[json]": { "editor.defaultFormatter": "esbenp.prettier-vscode" }, - "[typescriptreact]": { + "[jsonc]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "[type.scriptreact]": { "editor.defaultFormatter": "dbaeumer.vscode-eslint" + }, + "[javascript]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2aaee3f8..48ff924f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -44,7 +44,7 @@ importers: version: 1.8.0 '@p-stream/providers': specifier: github:p-stream/providers#production - version: https://codeload.github.com/p-stream/providers/tar.gz/5fa33694229da0506e7a265ff041acdb43e25ff0 + version: https://codeload.github.com/p-stream/providers/tar.gz/6a94f978c64ec025171246b7b27e5867bdf21ed1 '@plasmohq/messaging': specifier: ^0.6.2 version: 0.6.2(react@18.3.1) @@ -1207,8 +1207,8 @@ packages: resolution: {integrity: sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==} engines: {node: '>=12.4.0'} - '@p-stream/providers@https://codeload.github.com/p-stream/providers/tar.gz/5fa33694229da0506e7a265ff041acdb43e25ff0': - resolution: {tarball: https://codeload.github.com/p-stream/providers/tar.gz/5fa33694229da0506e7a265ff041acdb43e25ff0} + '@p-stream/providers@https://codeload.github.com/p-stream/providers/tar.gz/6a94f978c64ec025171246b7b27e5867bdf21ed1': + resolution: {tarball: https://codeload.github.com/p-stream/providers/tar.gz/6a94f978c64ec025171246b7b27e5867bdf21ed1} version: 3.2.0 '@pkgjs/parseargs@0.11.0': @@ -5523,7 +5523,7 @@ snapshots: '@nolyfill/is-core-module@1.0.39': {} - '@p-stream/providers@https://codeload.github.com/p-stream/providers/tar.gz/5fa33694229da0506e7a265ff041acdb43e25ff0': + '@p-stream/providers@https://codeload.github.com/p-stream/providers/tar.gz/6a94f978c64ec025171246b7b27e5867bdf21ed1': dependencies: abort-controller: 3.0.0 cheerio: 1.0.0-rc.12 diff --git a/src/components/player/utils/captionstranslation.ts b/src/components/player/utils/captionstranslation.ts new file mode 100644 index 00000000..85b729df --- /dev/null +++ b/src/components/player/utils/captionstranslation.ts @@ -0,0 +1,201 @@ +import subsrt from "subsrt-ts"; +import { Caption, ContentCaption } from "subsrt-ts/dist/types/handler"; + +const API_URL = + "https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&oe=UTF-8&sl=auto&tl={TARGET_LANG}&q="; +const RETRY_COUNT = 3; +const FETCH_RATE = 100; +const SUBTITLES_CACHE: Map = new Map< + string, + ArrayBuffer +>(); + +async function translateText(text: string): Promise { + if (!text) { + return ""; + } + + const response = await ( + await fetch( + `${API_URL.replace("TARGET_LANG", "ro")}${encodeURIComponent(text)}`, + { + method: "GET", + headers: { + Accept: "application/json", + }, + }, + ) + ).json(); + + if (!response) { + throw new Error("Empty response"); + } + + return (response.sentences as any[]) + .map((s: any) => s.trans as string) + .join(""); +} + +async function translateCaption(caption: ContentCaption): Promise { + (caption as any).oldText = caption.text; + let text: string | undefined; + for (let i = 0; i < RETRY_COUNT; i += 1) { + try { + text = await translateText(caption.text.replace("\n", "
")); + if (text) { + text = text.replace("
", "\n"); + break; + } + } catch (error) { + console.warn("[CTR] Re-trying caption translation", caption, error); + } + } + if (!text) { + console.error("[CTR] Failed to translate caption"); + caption.text = `(CAPTION COULD NOT BE TRANSLATED)\n${caption.text}}`; + return false; + } + caption.text = text.trim(); + return true; +} + +async function translateCaptions(captions: ContentCaption[]): Promise { + console.log("[CTR] Translating", captions.length, "captions"); + try { + const results: boolean[] = await Promise.all( + captions.map((c) => translateCaption(c)), + ); + + const successCount = results.filter((v) => v).length; + const failedCount = results.length - successCount; + const successPercentange = (successCount / results.length) * 100; + const failedPercentange = (failedCount / results.length) * 100; + console.log( + "[CTR] Done translating captions", + results.length, + successCount, + failedCount, + successPercentange, + failedPercentange, + ); + + if (failedPercentange > successPercentange) { + throw new Error("Success percentage is not acceptable"); + } + } catch (error) { + console.error( + "[CTR] Could not translate", + captions.length, + "captions", + error, + ); + return false; + } + return true; +} + +function tryUseCached( + caption: ContentCaption, + cache: Map, +): boolean { + const text: string | undefined = cache.get(caption.text); + if (text) { + caption.text = text; + return true; + } + return false; +} + +async function translateSRTData(data: string): Promise { + let captions: Caption[]; + try { + captions = subsrt.parse(data); + } catch (error) { + console.error("[CTR] Failed to parse subtitle data", error); + return undefined; + } + + let translatedCaptions: Caption[] | undefined = []; + const contentCaptions: ContentCaption[] = []; + const translatedCache: Map = new Map(); + + for (const caption of captions) { + translatedCaptions.push(caption); + if (caption.type !== "caption") { + continue; + } + caption.text = caption.text + .trim() + .replace("\r\n", "\n") + .replace("\r", "\n"); + contentCaptions.push(caption); + } + + for (let i = 0; i < contentCaptions.length; i += 1) { + if (tryUseCached(contentCaptions[i], translatedCache)) { + continue; + } + const batch: ContentCaption[] = [contentCaptions[i]]; + + let j; + for (j = 1; j < FETCH_RATE; j += 1) { + if (i + j >= contentCaptions.length) { + break; + } + if (tryUseCached(contentCaptions[i + j], translatedCache)) { + continue; + } + batch.push(contentCaptions[i + j]); + } + i += j; + + if (!(await translateCaptions(batch))) { + translatedCaptions = undefined; + break; + } + + batch.forEach((c) => translatedCache.set((c as any).oldText!, c.text)); + } + + return translatedCaptions + ? subsrt.build(translatedCaptions, { format: "srt" }) + : undefined; +} + +async function compressStr(string: string): Promise { + const byteArray = new TextEncoder().encode(string); + const cs = new CompressionStream("deflate"); + const writer = cs.writable.getWriter(); + writer.write(byteArray); + writer.close(); + return new Response(cs.readable).arrayBuffer(); +} + +async function decompressStr(byteArray: ArrayBuffer): Promise { + const cs = new DecompressionStream("deflate"); + const writer = cs.writable.getWriter(); + writer.write(byteArray); + writer.close(); + return new Response(cs.readable).arrayBuffer().then(function (arrayBuffer) { + return new TextDecoder().decode(arrayBuffer); + }); +} + +export async function translateSubtitles( + id: string, + srtData: string, +): Promise { + const cachedData: ArrayBuffer | undefined = SUBTITLES_CACHE.get(id); + if (cachedData) { + console.log("[CTR] Using cached translation for", id); + return decompressStr(cachedData); + } + console.log("[CTR] Translating", id); + const translatedData: string | undefined = await translateSRTData(srtData); + if (!translatedData) { + return undefined; + } + console.log("[CTR] Caching translation for", id); + SUBTITLES_CACHE.set(id, await compressStr(translatedData)); + return translatedData; +} diff --git a/src/pages/developer/TestView.tsx b/src/pages/developer/TestView.tsx index d6f5a053..375ed131 100644 --- a/src/pages/developer/TestView.tsx +++ b/src/pages/developer/TestView.tsx @@ -1,12 +1,74 @@ -import { useState } from "react"; +import { useCallback, useState } from "react"; import { Button } from "@/components/buttons/Button"; +import { usePlayer } from "@/components/player/hooks/usePlayer"; +import { PlaybackErrorPart } from "@/pages/parts/player/PlaybackErrorPart"; +import { PlayerPart } from "@/pages/parts/player/PlayerPart"; +import { + CaptionListItem, + PlayerMeta, + playerStatus, +} from "@/stores/player/slices/source"; +import { SourceSliceSource } from "@/stores/player/utils/qualities"; + +const subtitlesTestMeta: PlayerMeta = { + type: "movie", + title: "Subtitles Test", + releaseYear: 2024, + tmdbId: "0", +}; + +const subtitlesTestSource: SourceSliceSource = { + type: "hls", + url: "http://localhost:8000/media/master.m3u8", +}; + +const subtitlesTestSubs: CaptionListItem[] = [ + { + id: "English", + language: "en", + url: "http://localhost:8000/subs/en.srt", + needsProxy: false, + }, + { + id: "Romanian", + language: "ro", + url: "http://localhost:8000/subs/ro.srt", + needsProxy: false, + }, +]; // mostly empty view, add whatever you need export default function TestView() { - const [val, setVal] = useState(false); + const player = usePlayer(); + const [showPlayer, setShowPlayer] = useState(false); + const [shouldCrash, setShouldCrash] = useState(false); - if (val) throw new Error("I crashed"); + if (shouldCrash) { + throw new Error("I crashed"); + } - return ; + const subtitlesTest = useCallback(async () => { + setShowPlayer(true); + player.reset(); + await new Promise((r) => { + setTimeout(r, 100); + }); + player.setShouldStartFromBeginning(true); + player.setMeta(subtitlesTestMeta); + player.playMedia(subtitlesTestSource, subtitlesTestSubs, null); + }, [player]); + + return showPlayer ? ( + + {player && (player as any).status === playerStatus.PLAYBACK_ERROR ? ( + + ) : null} + + ) : ( + <> + + + + ); }