From f024f91c5131af4e2d6bc2369d0475056dbe6830 Mon Sep 17 00:00:00 2001 From: Pas <74743263+Pasithea0@users.noreply.github.com> Date: Sat, 3 May 2025 14:08:40 -0600 Subject: [PATCH] scrape imdb with languages --- src/components/overlays/DetailsModal.tsx | 1 - src/utils/imdbScraper.ts | 53 +++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/components/overlays/DetailsModal.tsx b/src/components/overlays/DetailsModal.tsx index a1876f93..8ea923ea 100644 --- a/src/components/overlays/DetailsModal.tsx +++ b/src/components/overlays/DetailsModal.tsx @@ -358,7 +358,6 @@ function DetailsContent({ type="button" onClick={togglePlay} className="absolute inset-0 flex items-center justify-center z-10" - title="Play" > diff --git a/src/utils/imdbScraper.ts b/src/utils/imdbScraper.ts index 8d1070e6..bebb7b08 100644 --- a/src/utils/imdbScraper.ts +++ b/src/utils/imdbScraper.ts @@ -3,6 +3,41 @@ import { isExtensionActive } from "@/backend/extension/messaging"; import { proxiedFetch } from "@/backend/helpers/fetch"; import { makeExtensionFetcher } from "@/backend/providers/fetchers"; import { useAuthStore } from "@/stores/auth"; +import { useLanguageStore } from "@/stores/language"; + +import { getTmdbLanguageCode } from "./language"; + +// IMDb language code mapping (differs from TMDB format) +// Map from ISO language code to IMDb language parameter +const imdbLanguageMap: Record = { + "en-US": "en-US", + "es-ES": "es-ES", + "fr-FR": "fr-FR", + "de-DE": "de-DE", + "it-IT": "it-IT", + "pt-PT": "pt-PT", + "ru-RU": "ru-RU", + "ja-JP": "ja-JP", + "zh-CN": "zh-CN", + "ko-KR": "ko-KR", + "ar-SA": "ar-SA", + "hi-IN": "hi-IN", + "el-GR": "el-GR", + // Add more mappings as needed +}; + +/** + * Convert a TMDB-style language code to an IMDb language code + * @param language TMDB-style language code (e.g., "en-US") + * @returns IMDb language code or default "en-US" + */ +function getImdbLanguageCode(language: string): string { + // If we have a direct mapping, use it + if (imdbLanguageMap[language]) return imdbLanguageMap[language]; + + // Otherwise default to English + return "en-US"; +} interface IMDbMetadata { title: string; @@ -79,6 +114,7 @@ export async function scrapeIMDb( imdbId: string, season?: number, episode?: number, + language?: string, ): Promise { // Check if we have a proxy or extension const hasExtension = await isExtensionActive(); @@ -95,12 +131,25 @@ export async function scrapeIMDb( `[IMDb Scraper] Using ${hasExtension ? "browser extension" : "custom proxy"} for requests`, ); - // Construct IMDb URL + // Get user language if not provided + if (!language) { + const userLanguage = useLanguageStore.getState().language; + language = getTmdbLanguageCode(userLanguage); + } + + // Get IMDb language format + const imdbLanguage = getImdbLanguageCode(language); + + // Construct IMDb URL with language parameter let imdbUrl = `https://www.imdb.com/title/${imdbId}/`; if (season && episode) { imdbUrl += `episodes?season=${season}`; } + // Add language parameter to URL + const separator = imdbUrl.includes("?") ? "&" : "?"; + imdbUrl += `${separator}locale=${imdbLanguage}`; + // Add random delay to avoid rate limiting const delay = Math.floor(Math.random() * (197 - 69) + 69); await new Promise((resolve) => { @@ -114,6 +163,7 @@ export async function scrapeIMDb( const result = await extensionFetcher(imdbUrl, { headers: { "User-Agent": getRandomUserAgent(), + "Accept-Language": imdbLanguage, }, method: "GET", query: {}, @@ -124,6 +174,7 @@ export async function scrapeIMDb( response = await proxiedFetch(imdbUrl, { headers: { "User-Agent": getRandomUserAgent(), + "Accept-Language": imdbLanguage, }, }); }