From 765012f090a93283cbecfbfeb99e0584a4157405 Mon Sep 17 00:00:00 2001 From: Ivan Evans <74743263+Pasithea0@users.noreply.github.com> Date: Tue, 31 Dec 2024 14:55:46 -0700 Subject: [PATCH] Fix captions that have weird text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unwanted tags like \an1, \pos, \i1, etc. \an1\pos(234,432)\i1}♪ The Simpsons ♪ --- src/components/player/base/SubtitleView.tsx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/components/player/base/SubtitleView.tsx b/src/components/player/base/SubtitleView.tsx index 1c9bf0f9..7e50ec17 100644 --- a/src/components/player/base/SubtitleView.tsx +++ b/src/components/player/base/SubtitleView.tsx @@ -11,6 +11,12 @@ import { Transition } from "@/components/utils/Transition"; import { usePlayerStore } from "@/stores/player/store"; import { SubtitleStyling, useSubtitleStore } from "@/stores/subtitles"; +// Clean-up function to remove unwanted subtitle tags +function cleanSubtitleText(text: string): string { + // Remove unwanted tags like \an1, \pos, \i1, etc. + return text.replace(/\\[a-zA-Z0-9(),\s\-_]+/g, "").replace(/\}\s*/g, ""); +} + const wordOverrides: Record = { i: "I", }; @@ -37,9 +43,12 @@ export function CaptionCue({ .replaceAll(/ i'/g, " I'") .replaceAll(/\r?\n/g, "
"); + // Clean the subtitle text before sanitizing it + const cleanedText = cleanSubtitleText(textWithNewlines); + // https://www.w3.org/TR/webvtt1/#dom-construction-rules // added a
for newlines - const html = sanitize(textWithNewlines, { + const html = sanitize(cleanedText, { ALLOWED_TAGS: ["c", "b", "i", "u", "span", "ruby", "rt", "br"], ADD_TAGS: ["v", "lang"], ALLOWED_ATTR: ["title", "lang"],