mirror of
https://github.com/p-stream/p-stream.git
synced 2026-03-11 17:55:33 +00:00
293 lines
8.8 KiB
TypeScript
293 lines
8.8 KiB
TypeScript
/* eslint-disable no-console */
|
|
import { isExtensionActive } from "@/backend/extension/messaging";
|
|
import { proxiedFetch } from "@/backend/helpers/fetch";
|
|
import { makeExtensionFetcher } from "@/backend/providers/fetchers";
|
|
import { useAuthStore } from "@/stores/auth";
|
|
import { useLanguageStore } from "@/stores/language";
|
|
|
|
import { getTmdbLanguageCode } from "./language";
|
|
|
|
// IMDb language code mapping (differs from TMDB format)
|
|
// Map from ISO language code to IMDb language parameter
|
|
const imdbLanguageMap: Record<string, string> = {
|
|
"en-US": "en-US",
|
|
"es-ES": "es-ES",
|
|
"fr-FR": "fr-FR",
|
|
"de-DE": "de-DE",
|
|
"it-IT": "it-IT",
|
|
"pt-PT": "pt-PT",
|
|
"ru-RU": "ru-RU",
|
|
"ja-JP": "ja-JP",
|
|
"zh-CN": "zh-CN",
|
|
"ko-KR": "ko-KR",
|
|
"ar-SA": "ar-SA",
|
|
"hi-IN": "hi-IN",
|
|
"el-GR": "el-GR",
|
|
// Add more mappings as needed
|
|
};
|
|
|
|
/**
|
|
* Convert a TMDB-style language code to an IMDb language code
|
|
* @param language TMDB-style language code (e.g., "en-US")
|
|
* @returns IMDb language code or default "en-US"
|
|
*/
|
|
function getImdbLanguageCode(language: string): string {
|
|
// If we have a direct mapping, use it
|
|
if (imdbLanguageMap[language]) return imdbLanguageMap[language];
|
|
|
|
// Otherwise default to English
|
|
return "en-US";
|
|
}
|
|
|
|
interface IMDbMetadata {
|
|
title?: string;
|
|
original_title?: string;
|
|
title_type?: string;
|
|
year?: number | null;
|
|
end_year?: number | null;
|
|
day?: number | null;
|
|
month?: number | null;
|
|
date?: string;
|
|
runtime?: number | null;
|
|
age_rating?: string;
|
|
imdb_rating?: number | null;
|
|
votes?: number | null;
|
|
plot?: string;
|
|
poster_url?: string;
|
|
trailer_url?: string;
|
|
trailer_thumbnail?: string;
|
|
url?: string;
|
|
genre?: string[];
|
|
cast?: string[];
|
|
directors?: string[];
|
|
writers?: string[];
|
|
keywords?: string[];
|
|
countries?: string[];
|
|
languages?: string[];
|
|
locations?: string[];
|
|
season?: number;
|
|
episode?: number;
|
|
episode_title?: string;
|
|
episode_plot?: string;
|
|
episode_rating?: number;
|
|
episode_votes?: number;
|
|
}
|
|
|
|
const months = [
|
|
"January",
|
|
"February",
|
|
"March",
|
|
"April",
|
|
"May",
|
|
"June",
|
|
"July",
|
|
"August",
|
|
"September",
|
|
"October",
|
|
"November",
|
|
"December",
|
|
];
|
|
|
|
const userAgents = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
|
];
|
|
|
|
function getRandomUserAgent(): string {
|
|
return userAgents[Math.floor(Math.random() * userAgents.length)];
|
|
}
|
|
|
|
export async function scrapeIMDb(
|
|
imdbId: string,
|
|
season?: number,
|
|
episode?: number,
|
|
language?: string,
|
|
type?: "movie" | "show",
|
|
): Promise<IMDbMetadata> {
|
|
// Check if we have a proxy or extension
|
|
const hasExtension = await isExtensionActive();
|
|
const hasProxy = Boolean(useAuthStore.getState().proxySet);
|
|
|
|
if (!hasExtension && !hasProxy) {
|
|
// Custom API for trailers:
|
|
const trailerResponse = await fetch(
|
|
`https://fed-trailers.pstream.mov/${type === "movie" ? "movie" : "tv"}/${imdbId}`,
|
|
).then((res) => res.json());
|
|
if (trailerResponse.trailer?.embed_url) {
|
|
return {
|
|
trailer_url: trailerResponse.trailer.embed_url,
|
|
};
|
|
}
|
|
// END CUSTOM API
|
|
throw new Error(
|
|
"IMDb scraping requires either the browser extension or a custom proxy to be set up. " +
|
|
"Please install the extension or set up a proxy in the settings.",
|
|
);
|
|
}
|
|
|
|
console.log(
|
|
`[IMDb Scraper] Using ${hasExtension ? "browser extension" : "custom proxy"} for requests`,
|
|
);
|
|
|
|
// Get user language if not provided
|
|
if (!language) {
|
|
const userLanguage = useLanguageStore.getState().language;
|
|
language = getTmdbLanguageCode(userLanguage);
|
|
}
|
|
|
|
// Get IMDb language format
|
|
const imdbLanguage = getImdbLanguageCode(language);
|
|
|
|
// Construct IMDb URL with language parameter
|
|
let imdbUrl = `https://www.imdb.com/title/${imdbId}/`;
|
|
if (season && episode) {
|
|
imdbUrl += `episodes?season=${season}`;
|
|
}
|
|
|
|
// Add language parameter to URL
|
|
const separator = imdbUrl.includes("?") ? "&" : "?";
|
|
imdbUrl += `${separator}locale=${imdbLanguage}`;
|
|
|
|
// Add random delay to avoid rate limiting
|
|
const delay = Math.floor(Math.random() * (197 - 69) + 69);
|
|
await new Promise<void>((resolve) => {
|
|
setTimeout(resolve, delay);
|
|
});
|
|
|
|
// Fetch IMDb page using appropriate fetcher
|
|
let response: string;
|
|
if (hasExtension) {
|
|
const extensionFetcher = makeExtensionFetcher();
|
|
const result = await extensionFetcher(imdbUrl, {
|
|
headers: {
|
|
"User-Agent": getRandomUserAgent(),
|
|
"Accept-Language": imdbLanguage,
|
|
},
|
|
method: "GET",
|
|
query: {},
|
|
readHeaders: [],
|
|
});
|
|
response = result.body as string;
|
|
} else {
|
|
response = await proxiedFetch<string>(imdbUrl, {
|
|
headers: {
|
|
"User-Agent": getRandomUserAgent(),
|
|
"Accept-Language": imdbLanguage,
|
|
},
|
|
});
|
|
}
|
|
|
|
// Extract JSON data from the page
|
|
const jsonMatch = response.match(
|
|
/<script id="__NEXT_DATA__" type="application\/json">(.*?)<\/script>/,
|
|
);
|
|
if (!jsonMatch) {
|
|
throw new Error("Could not find IMDb data on the page");
|
|
}
|
|
|
|
const data = JSON.parse(jsonMatch[1]);
|
|
const metadata: IMDbMetadata = {
|
|
title: "",
|
|
original_title: "",
|
|
title_type: "",
|
|
year: null,
|
|
end_year: null,
|
|
day: null,
|
|
month: null,
|
|
date: "",
|
|
runtime: null,
|
|
age_rating: "",
|
|
imdb_rating: null,
|
|
votes: null,
|
|
plot: "",
|
|
poster_url: "",
|
|
trailer_url: "",
|
|
url: imdbUrl,
|
|
genre: [],
|
|
cast: [],
|
|
directors: [],
|
|
writers: [],
|
|
keywords: [],
|
|
countries: [],
|
|
languages: [],
|
|
locations: [],
|
|
season,
|
|
episode,
|
|
};
|
|
|
|
try {
|
|
// Extract all the metadata
|
|
const aboveTheFold = data.props.pageProps.aboveTheFoldData;
|
|
const mainColumn = data.props.pageProps.mainColumnData;
|
|
|
|
metadata.title = aboveTheFold.titleText?.text || "";
|
|
metadata.original_title = aboveTheFold.originalTitleText?.text || "";
|
|
metadata.title_type = aboveTheFold.titleType?.text || "";
|
|
metadata.age_rating = aboveTheFold.certificate?.rating || "";
|
|
metadata.year = aboveTheFold.releaseYear?.year || null;
|
|
metadata.end_year = aboveTheFold.releaseYear?.endYear || null;
|
|
metadata.day = aboveTheFold.releaseDate?.day || null;
|
|
metadata.month = aboveTheFold.releaseDate?.month || null;
|
|
|
|
if (metadata.month && metadata.day && metadata.year) {
|
|
metadata.date = `${months[metadata.month - 1]} ${metadata.day}, ${metadata.year}`;
|
|
}
|
|
|
|
metadata.runtime = aboveTheFold.runtime?.seconds || null;
|
|
metadata.plot = aboveTheFold.plot?.plotText?.plainText || "";
|
|
metadata.imdb_rating = aboveTheFold.ratingsSummary?.aggregateRating || null;
|
|
metadata.votes = aboveTheFold.ratingsSummary?.voteCount || null;
|
|
metadata.poster_url = aboveTheFold.primaryImage?.url || "";
|
|
const trailerNode = aboveTheFold.primaryVideos?.edges?.[0]?.node;
|
|
metadata.trailer_url = trailerNode?.playbackURLs?.[0]?.url || "";
|
|
metadata.trailer_thumbnail = trailerNode?.thumbnail?.url || "";
|
|
|
|
// Extract arrays
|
|
metadata.genre = aboveTheFold.genres?.genres?.map((g: any) => g.text) || [];
|
|
metadata.cast =
|
|
aboveTheFold.castPageTitle?.edges?.map(
|
|
(e: any) => e.node.name.nameText.text,
|
|
) || [];
|
|
metadata.directors =
|
|
aboveTheFold.directorsPageTitle?.[0]?.credits?.map(
|
|
(c: any) => c.name.nameText.text,
|
|
) || [];
|
|
metadata.writers =
|
|
mainColumn.writers?.[0]?.credits?.map((c: any) => c.name.nameText.text) ||
|
|
[];
|
|
metadata.keywords =
|
|
aboveTheFold.keywords?.edges?.map((e: any) => e.node.text) || [];
|
|
metadata.countries =
|
|
mainColumn.countriesOfOrigin?.countries?.map((c: any) => c.text) || [];
|
|
metadata.languages =
|
|
mainColumn.spokenLanguages?.spokenLanguages?.map((l: any) => l.text) ||
|
|
[];
|
|
metadata.locations =
|
|
mainColumn.filmingLocations?.edges?.map((e: any) => e.node.text) || [];
|
|
|
|
// If season and episode are provided, get episode-specific data
|
|
if (season && episode) {
|
|
const episodeData =
|
|
data.props.pageProps.mainColumnData.episodes?.edges?.find(
|
|
(e: any) => e.node.episodeNumber === episode,
|
|
);
|
|
|
|
if (episodeData) {
|
|
metadata.episode_title = episodeData.node.titleText?.text || "";
|
|
metadata.episode_plot =
|
|
episodeData.node.plot?.plotText?.plainText || "";
|
|
metadata.episode_rating =
|
|
episodeData.node.ratingsSummary?.aggregateRating || null;
|
|
metadata.episode_votes =
|
|
episodeData.node.ratingsSummary?.voteCount || null;
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error("Error parsing IMDb data:", error);
|
|
throw error;
|
|
}
|
|
|
|
return metadata;
|
|
}
|