improve subtitle scraping

This commit is contained in:
Pas 2025-06-09 14:06:50 -06:00
parent fb7632b30b
commit 06777a2580
2 changed files with 83 additions and 78 deletions

View file

@ -9,7 +9,6 @@ import { NotFoundError } from '@/utils/errors';
import { addOpenSubtitlesCaptions } from '@/utils/opensubtitles';
import { requiresProxy, setupProxy } from '@/utils/proxy';
import { isValidStream, validatePlayableStreams } from '@/utils/valid';
import { addWyzieCaptions } from '@/utils/wyziesubs';
export type IndividualSourceRunnerOptions = {
features: FeatureMap;
@ -95,36 +94,15 @@ export async function scrapeInvidualSource(
// opensubtitles
if (!ops.disableOpensubtitles) {
for (const playableStream of playableStreams) {
// Try Wyzie subs first
if (ops.media.imdbId) {
playableStream.captions = await addWyzieCaptions(
playableStream.captions,
ops.media.tmdbId,
ops.media.imdbId,
ops.media.type === 'show' ? ops.media.season.number : undefined,
ops.media.type === 'show' ? ops.media.episode.number : undefined,
);
// Fall back to OpenSubtitles if no Wyzie subs found
if (!playableStream.captions.some((caption) => caption.wyziesubs)) {
const [imdbId, season, episode] = atob(ops.media.imdbId)
.split('.')
.map((x, i) => (i === 0 ? x : Number(x) || null));
const mediaInfo = {
...ops,
media: {
type: season && episode ? 'show' : 'movie',
imdbId: imdbId?.toString() || '',
...(season && episode ? { season: { number: season }, episode: { number: episode } } : {}),
} as ScrapeMedia,
};
playableStream.captions = await addOpenSubtitlesCaptions(
playableStream.captions,
mediaInfo,
ops.media.imdbId,
);
}
}
playableStream.captions = await addOpenSubtitlesCaptions(
playableStream.captions,
ops,
btoa(
`${ops.media.imdbId}${
ops.media.type === 'show' ? `.${ops.media.season.number}.${ops.media.episode.number}` : ''
}`,
),
);
}
}
output.stream = playableStreams;

View file

@ -1,3 +1,4 @@
/* eslint-disable no-console */
import { ScrapeMedia } from '@/entrypoint/utils/media';
import { Caption, labelToLanguageCode, removeDuplicatedLanguages } from '@/providers/captions';
import { IndividualEmbedRunnerOptions } from '@/runners/individualRunner';
@ -9,6 +10,14 @@ type CaptionOptions = (ProviderRunnerOptions | IndividualEmbedRunnerOptions) & {
media?: ScrapeMedia;
};
const timeout = (ms: number, source: string) =>
new Promise<null>((resolve) => {
setTimeout(() => {
console.error(`${source} captions request timed out after ${ms}ms`);
resolve(null);
}, ms);
});
export async function addOpenSubtitlesCaptions(
captions: Caption[],
ops: CaptionOptions,
@ -20,59 +29,77 @@ export async function addOpenSubtitlesCaptions(
.map((x, i) => (i === 0 ? x : Number(x) || null));
if (!imdbId) return captions;
// First try Wyzie subs
const wyzieCaptions = await addWyzieCaptions(
[],
ops.media?.tmdbId?.toString() || '',
imdbId.toString(),
typeof season === 'number' ? season : undefined,
typeof episode === 'number' ? episode : undefined,
);
// Try Wyzie subs first. 2 second timeout
try {
const wyziePromise = addWyzieCaptions(
[],
ops.media?.tmdbId?.toString() || '',
imdbId.toString(),
typeof season === 'number' ? season : undefined,
typeof episode === 'number' ? episode : undefined,
);
// If we found Wyzie subs, return them as OpenSubtitles captions
if (wyzieCaptions.length > 0) {
return [
...captions,
...wyzieCaptions.map((caption) => ({
...caption,
opensubtitles: true,
})),
];
const wyzieCaptions = await Promise.race([wyziePromise, timeout(2000, 'Wyzie')]);
// If we found Wyzie subs, return them as OpenSubtitles captions
if (wyzieCaptions && wyzieCaptions.length > 0) {
return [
...captions,
...wyzieCaptions.map((caption) => ({
...caption,
opensubtitles: true,
})),
];
}
} catch (error) {
// Wyzie failed for a reason other than timeout
console.error('Wyzie subtitles fetch failed:', error);
}
// Fall back to OpenSubtitles if no Wyzie subs found
const Res: {
LanguageName: string;
SubDownloadLink: string;
SubFormat: 'srt' | 'vtt';
}[] = await ops.proxiedFetcher(
`https://rest.opensubtitles.org/search/${
season && episode ? `episode-${episode}/` : ''
}imdbid-${(imdbId as string).slice(2)}${season && episode ? `/season-${season}` : ''}`,
{
headers: {
'X-User-Agent': 'VLSub 0.10.2',
// Fall back to OpenSubtitles with a 5 second timeout
try {
const openSubsPromise = ops.proxiedFetcher(
`https://rest.opensubtitles.org/search/${
season && episode ? `episode-${episode}/` : ''
}imdbid-${(imdbId as string).slice(2)}${season && episode ? `/season-${season}` : ''}`,
{
headers: {
'X-User-Agent': 'VLSub 0.10.2',
},
},
},
);
);
const openSubtilesCaptions: Caption[] = [];
for (const caption of Res) {
const url = caption.SubDownloadLink.replace('.gz', '').replace('download/', 'download/subencoding-utf8/');
const language = labelToLanguageCode(caption.LanguageName);
if (!url || !language) continue;
else
openSubtilesCaptions.push({
id: url,
opensubtitles: true,
url,
type: caption.SubFormat || 'srt',
hasCorsRestrictions: false,
language,
});
const Res: {
LanguageName: string;
SubDownloadLink: string;
SubFormat: 'srt' | 'vtt';
}[] = await Promise.race([openSubsPromise, timeout(5000, 'OpenSubtitles')]);
if (!Res) return captions; // Timeout occurred
const openSubtilesCaptions: Caption[] = [];
for (const caption of Res) {
const url = caption.SubDownloadLink.replace('.gz', '').replace('download/', 'download/subencoding-utf8/');
const language = labelToLanguageCode(caption.LanguageName);
if (!url || !language) continue;
else
openSubtilesCaptions.push({
id: url,
opensubtitles: true,
url,
type: caption.SubFormat || 'srt',
hasCorsRestrictions: false,
language,
});
}
return [...captions, ...removeDuplicatedLanguages(openSubtilesCaptions)];
} catch (error) {
// OpenSubtitles failed for a reason other than timeout
console.error('OpenSubtitles fetch failed:', error);
return captions;
}
return [...captions, ...removeDuplicatedLanguages(openSubtilesCaptions)];
} catch {
} catch (error) {
console.error('Error in addOpenSubtitlesCaptions:', error);
return captions;
}
}