improve subtitle scraping

This commit is contained in:
Pas 2025-06-09 14:06:50 -06:00
parent fb7632b30b
commit 06777a2580
2 changed files with 83 additions and 78 deletions

View file

@ -9,7 +9,6 @@ import { NotFoundError } from '@/utils/errors';
import { addOpenSubtitlesCaptions } from '@/utils/opensubtitles'; import { addOpenSubtitlesCaptions } from '@/utils/opensubtitles';
import { requiresProxy, setupProxy } from '@/utils/proxy'; import { requiresProxy, setupProxy } from '@/utils/proxy';
import { isValidStream, validatePlayableStreams } from '@/utils/valid'; import { isValidStream, validatePlayableStreams } from '@/utils/valid';
import { addWyzieCaptions } from '@/utils/wyziesubs';
export type IndividualSourceRunnerOptions = { export type IndividualSourceRunnerOptions = {
features: FeatureMap; features: FeatureMap;
@ -95,36 +94,15 @@ export async function scrapeInvidualSource(
// opensubtitles // opensubtitles
if (!ops.disableOpensubtitles) { if (!ops.disableOpensubtitles) {
for (const playableStream of playableStreams) { for (const playableStream of playableStreams) {
// Try Wyzie subs first playableStream.captions = await addOpenSubtitlesCaptions(
if (ops.media.imdbId) { playableStream.captions,
playableStream.captions = await addWyzieCaptions( ops,
playableStream.captions, btoa(
ops.media.tmdbId, `${ops.media.imdbId}${
ops.media.imdbId, ops.media.type === 'show' ? `.${ops.media.season.number}.${ops.media.episode.number}` : ''
ops.media.type === 'show' ? ops.media.season.number : undefined, }`,
ops.media.type === 'show' ? ops.media.episode.number : undefined, ),
); );
// Fall back to OpenSubtitles if no Wyzie subs found
if (!playableStream.captions.some((caption) => caption.wyziesubs)) {
const [imdbId, season, episode] = atob(ops.media.imdbId)
.split('.')
.map((x, i) => (i === 0 ? x : Number(x) || null));
const mediaInfo = {
...ops,
media: {
type: season && episode ? 'show' : 'movie',
imdbId: imdbId?.toString() || '',
...(season && episode ? { season: { number: season }, episode: { number: episode } } : {}),
} as ScrapeMedia,
};
playableStream.captions = await addOpenSubtitlesCaptions(
playableStream.captions,
mediaInfo,
ops.media.imdbId,
);
}
}
} }
} }
output.stream = playableStreams; output.stream = playableStreams;

View file

@ -1,3 +1,4 @@
/* eslint-disable no-console */
import { ScrapeMedia } from '@/entrypoint/utils/media'; import { ScrapeMedia } from '@/entrypoint/utils/media';
import { Caption, labelToLanguageCode, removeDuplicatedLanguages } from '@/providers/captions'; import { Caption, labelToLanguageCode, removeDuplicatedLanguages } from '@/providers/captions';
import { IndividualEmbedRunnerOptions } from '@/runners/individualRunner'; import { IndividualEmbedRunnerOptions } from '@/runners/individualRunner';
@ -9,6 +10,14 @@ type CaptionOptions = (ProviderRunnerOptions | IndividualEmbedRunnerOptions) & {
media?: ScrapeMedia; media?: ScrapeMedia;
}; };
const timeout = (ms: number, source: string) =>
new Promise<null>((resolve) => {
setTimeout(() => {
console.error(`${source} captions request timed out after ${ms}ms`);
resolve(null);
}, ms);
});
export async function addOpenSubtitlesCaptions( export async function addOpenSubtitlesCaptions(
captions: Caption[], captions: Caption[],
ops: CaptionOptions, ops: CaptionOptions,
@ -20,59 +29,77 @@ export async function addOpenSubtitlesCaptions(
.map((x, i) => (i === 0 ? x : Number(x) || null)); .map((x, i) => (i === 0 ? x : Number(x) || null));
if (!imdbId) return captions; if (!imdbId) return captions;
// First try Wyzie subs // Try Wyzie subs first. 2 second timeout
const wyzieCaptions = await addWyzieCaptions( try {
[], const wyziePromise = addWyzieCaptions(
ops.media?.tmdbId?.toString() || '', [],
imdbId.toString(), ops.media?.tmdbId?.toString() || '',
typeof season === 'number' ? season : undefined, imdbId.toString(),
typeof episode === 'number' ? episode : undefined, typeof season === 'number' ? season : undefined,
); typeof episode === 'number' ? episode : undefined,
);
// If we found Wyzie subs, return them as OpenSubtitles captions const wyzieCaptions = await Promise.race([wyziePromise, timeout(2000, 'Wyzie')]);
if (wyzieCaptions.length > 0) {
return [ // If we found Wyzie subs, return them as OpenSubtitles captions
...captions, if (wyzieCaptions && wyzieCaptions.length > 0) {
...wyzieCaptions.map((caption) => ({ return [
...caption, ...captions,
opensubtitles: true, ...wyzieCaptions.map((caption) => ({
})), ...caption,
]; opensubtitles: true,
})),
];
}
} catch (error) {
// Wyzie failed for a reason other than timeout
console.error('Wyzie subtitles fetch failed:', error);
} }
// Fall back to OpenSubtitles if no Wyzie subs found // Fall back to OpenSubtitles with a 5 second timeout
const Res: { try {
LanguageName: string; const openSubsPromise = ops.proxiedFetcher(
SubDownloadLink: string; `https://rest.opensubtitles.org/search/${
SubFormat: 'srt' | 'vtt'; season && episode ? `episode-${episode}/` : ''
}[] = await ops.proxiedFetcher( }imdbid-${(imdbId as string).slice(2)}${season && episode ? `/season-${season}` : ''}`,
`https://rest.opensubtitles.org/search/${ {
season && episode ? `episode-${episode}/` : '' headers: {
}imdbid-${(imdbId as string).slice(2)}${season && episode ? `/season-${season}` : ''}`, 'X-User-Agent': 'VLSub 0.10.2',
{ },
headers: {
'X-User-Agent': 'VLSub 0.10.2',
}, },
}, );
);
const openSubtilesCaptions: Caption[] = []; const Res: {
for (const caption of Res) { LanguageName: string;
const url = caption.SubDownloadLink.replace('.gz', '').replace('download/', 'download/subencoding-utf8/'); SubDownloadLink: string;
const language = labelToLanguageCode(caption.LanguageName); SubFormat: 'srt' | 'vtt';
if (!url || !language) continue; }[] = await Promise.race([openSubsPromise, timeout(5000, 'OpenSubtitles')]);
else
openSubtilesCaptions.push({ if (!Res) return captions; // Timeout occurred
id: url,
opensubtitles: true, const openSubtilesCaptions: Caption[] = [];
url, for (const caption of Res) {
type: caption.SubFormat || 'srt', const url = caption.SubDownloadLink.replace('.gz', '').replace('download/', 'download/subencoding-utf8/');
hasCorsRestrictions: false, const language = labelToLanguageCode(caption.LanguageName);
language, if (!url || !language) continue;
}); else
openSubtilesCaptions.push({
id: url,
opensubtitles: true,
url,
type: caption.SubFormat || 'srt',
hasCorsRestrictions: false,
language,
});
}
return [...captions, ...removeDuplicatedLanguages(openSubtilesCaptions)];
} catch (error) {
// OpenSubtitles failed for a reason other than timeout
console.error('OpenSubtitles fetch failed:', error);
return captions;
} }
return [...captions, ...removeDuplicatedLanguages(openSubtilesCaptions)]; } catch (error) {
} catch { console.error('Error in addOpenSubtitlesCaptions:', error);
return captions; return captions;
} }
} }