mirror of
https://github.com/p-stream/providers.git
synced 2026-01-11 20:10:33 +00:00
update docs
This commit is contained in:
parent
42e4a1113e
commit
3be5bdac6f
34 changed files with 7844 additions and 5364 deletions
|
|
@ -1,9 +1,9 @@
|
|||
export default defineAppConfig({
|
||||
docus: {
|
||||
title: '@movie-web/providers',
|
||||
title: '@p-stream/providers',
|
||||
description: 'For all your media scraping needs',
|
||||
socials: {
|
||||
github: 'movie-web/providers',
|
||||
github: 'p-stream/providers',
|
||||
},
|
||||
image: '',
|
||||
aside: {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
title: "@movie-web/providers | For all your media scraping needs"
|
||||
title: "@p-stream/providers | For all your media scraping needs"
|
||||
navigation: false
|
||||
layout: page
|
||||
---
|
||||
|
|
@ -11,12 +11,12 @@ cta:
|
|||
- /get-started/introduction
|
||||
secondary:
|
||||
- Open on GitHub →
|
||||
- https://github.com/movie-web/providers
|
||||
snippet: npm i @movie-web/providers
|
||||
- https://github.com/p-stream/providers
|
||||
snippet: npm i @p-stream/providers
|
||||
---
|
||||
|
||||
#title
|
||||
@movie-web/providers
|
||||
@p-stream/providers
|
||||
|
||||
#description
|
||||
Easily scrape all sorts of media sites for content
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# Introduction
|
||||
|
||||
## What is `@movie-web/providers`?
|
||||
## What is `@p-stream/providers`?
|
||||
|
||||
`@movie-web/providers` is the soul of [movie-web](https://github.com/movie-web/movie-web). It's a collection of scrapers of various streaming sites. It extracts the raw streams from those sites, so you can watch them without any extra fluff from the original sites.
|
||||
`@p-stream/providers` is the soul of [p-stream](https://github.com/p-stream/p-stream). It's a collection of scrapers of various streaming sites. It extracts the raw streams from those sites, so you can watch them without any extra fluff from the original sites.
|
||||
|
||||
## What can I use this on?
|
||||
|
||||
|
|
|
|||
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
## Installation
|
||||
|
||||
Let's get started with `@movie-web/providers`. First lets install the package.
|
||||
Let's get started with `@p-stream/providers`. First lets install the package.
|
||||
|
||||
::code-group
|
||||
```bash [NPM]
|
||||
npm install @movie-web/providers
|
||||
npm install @p-stream/providers
|
||||
```
|
||||
```bash [Yarn]
|
||||
yarn add @movie-web/providers
|
||||
yarn add @p-stream/providers
|
||||
```
|
||||
```bash [PNPM]
|
||||
pnpm install @movie-web/providers
|
||||
pnpm install @p-stream/providers
|
||||
```
|
||||
::
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ This snippet will only work on a **server**. For other environments, check out [
|
|||
::
|
||||
|
||||
```ts [index.ts (server)]
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@movie-web/providers';
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@p-stream/providers';
|
||||
|
||||
// this is how the library will make http requests
|
||||
const myFetcher = makeStandardFetcher(fetch);
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@ To make use of the examples below, check out the following pages:
|
|||
|
||||
## NodeJs server
|
||||
```ts
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@movie-web/providers';
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@p-stream/providers';
|
||||
|
||||
const providers = makeProviders({
|
||||
fetcher: makeStandardFetcher(fetch),
|
||||
target: chooseYourself, // check out https://movie-web.github.io/providers/essentials/targets
|
||||
target: chooseYourself, // check out https://p-stream.github.io/providers/essentials/targets
|
||||
})
|
||||
```
|
||||
|
||||
|
|
@ -27,7 +27,7 @@ Using the provider package client-side requires a hosted version of simple-proxy
|
|||
Read more [about proxy fetchers](./2.fetchers.md#using-fetchers-on-the-browser).
|
||||
|
||||
```ts
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@movie-web/providers';
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@p-stream/providers';
|
||||
|
||||
const proxyUrl = "https://your.proxy.workers.dev/";
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ And follow the [react-native-quick-crypto documentation](https://github.com/marg
|
|||
3. Then you can use the library like this:
|
||||
|
||||
```ts
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@movie-web/providers';
|
||||
import { makeProviders, makeStandardFetcher, targets } from '@p-stream/providers';
|
||||
|
||||
const providers = makeProviders({
|
||||
fetcher: makeStandardFetcher(fetch),
|
||||
|
|
|
|||
|
|
@ -9,6 +9,6 @@ A target is the device on which the stream will be played.
|
|||
|
||||
#### Possible targets
|
||||
- **`targets.BROWSER`** Stream will be played in a browser with CORS
|
||||
- **`targets.BROWSER_EXTENSION`** Stream will be played in a browser using the movie-web extension (WIP)
|
||||
- **`targets.BROWSER_EXTENSION`** Stream will be played in a browser using the p-stream extension (WIP)
|
||||
- **`targets.NATIVE`** Stream will be played on a native video player
|
||||
- **`targets.ANY`** No restrictions for selecting streams, will just give all of them
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ const fetcher = makeStandardFetcher(fetch);
|
|||
## Using fetchers on the browser
|
||||
When using this library on a browser, you will need a proxy. Browsers restrict when a web request can be made. To bypass those restrictions, you will need a CORS proxy.
|
||||
|
||||
The movie-web team has a proxy pre-made and pre-configured for you to use. For more information, check out [movie-web/simple-proxy](https://github.com/movie-web/simple-proxy). After installing, you can use this proxy like so:
|
||||
The p-stream team has a proxy pre-made and pre-configured for you to use. For more information, check out [p-stream/simple-proxy](https://github.com/p-stream/simple-proxy). After installing, you can use this proxy like so:
|
||||
|
||||
```ts
|
||||
const fetcher = makeSimpleProxyFetcher("https://your.proxy.workers.dev/", fetch);
|
||||
|
|
@ -57,7 +57,7 @@ This is the list of features it needs:
|
|||
- Send JSON, Formdata or normal strings
|
||||
- get final destination URL
|
||||
|
||||
It's not recommended to do this at all. If you have to, you can base your code on the original implementation of `makeStandardFetcher`. Check out the [source code for it here](https://github.com/movie-web/providers/blob/dev/src/fetchers/standardFetch.ts).
|
||||
It's not recommended to do this at all. If you have to, you can base your code on the original implementation of `makeStandardFetcher`. Check out the [source code for it here](https://github.com/p-stream/providers/blob/dev/src/fetchers/standardFetch.ts).
|
||||
|
||||
Here is a basic template on how to make your own custom fetcher:
|
||||
|
||||
|
|
|
|||
13
.docs/content/3.in-depth/0.development.md
Normal file
13
.docs/content/3.in-depth/0.development.md
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Development Guide
|
||||
|
||||
This guide covers everything you need to start contributing.
|
||||
|
||||
## Get Started
|
||||
- **[Setup and Prerequisites](/in-depth/setup-and-prerequisites)** - Start here!
|
||||
|
||||
## In-Depth Guides
|
||||
|
||||
- **[Provider System](/in-depth/provider-system)** - How sources, embeds, and ranking work
|
||||
- **[Building Scrapers](/in-depth/building-scrapers)** - Complete guide to creating scrapers
|
||||
- **[Flags System](/in-depth/flags)** - Target compatibility and stream properties
|
||||
- **[Advanced Concepts](/in-depth/advanced-concepts)** - Error handling, proxying, and best practices
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
# Sources vs embeds
|
||||
|
||||
::alert{type="warning"}
|
||||
This page isn't quite done yet, stay tuned!
|
||||
::
|
||||
|
||||
<!--
|
||||
TODO
|
||||
- How do sources and embeds differ
|
||||
- How do sources and embeds interact
|
||||
-->
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
# New providers
|
||||
|
||||
::alert{type="warning"}
|
||||
This page isn't quite done yet, stay tuned!
|
||||
::
|
||||
|
||||
<!--
|
||||
TODO
|
||||
- How to make new sources or embeds
|
||||
- Ranking
|
||||
- Link to flags
|
||||
-->
|
||||
179
.docs/content/3.in-depth/1.setup-and-prerequisites.md
Normal file
179
.docs/content/3.in-depth/1.setup-and-prerequisites.md
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
# Setup and Prerequisites
|
||||
|
||||
Before you start building scrapers, you need to set up your development environment and understand the testing workflow.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
### 1. Create Environment File
|
||||
|
||||
Create a `.env` file in the root of the repository with the following variables:
|
||||
|
||||
```env
|
||||
MOVIE_WEB_TMDB_API_KEY = "your_tmdb_api_key_here"
|
||||
MOVIE_WEB_PROXY_URL = "https://your-proxy-url.com" # Optional
|
||||
```
|
||||
|
||||
**Getting a TMDB API Key:**
|
||||
1. Create an account at [TheMovieDB](https://www.themoviedb.org/)
|
||||
2. Go to Settings > API
|
||||
3. Request an API key (choose "Developer" for free usage)
|
||||
4. Use the provided key in your `.env` file
|
||||
|
||||
**Proxy URL (Optional):**
|
||||
- Useful for testing scrapers that require proxy access
|
||||
- Can help bypass geographical restrictions during development
|
||||
- If not provided, the library will use default proxy services
|
||||
|
||||
### 2. Install Dependencies
|
||||
|
||||
Install all required dependencies:
|
||||
|
||||
```sh
|
||||
pnpm install
|
||||
```
|
||||
|
||||
## Familiarize Yourself with the CLI
|
||||
|
||||
The library provides a CLI tool that's essential for testing scrapers during development. Unit tests can't be made for scrapers due to their unreliable nature, so the CLI is your primary testing tool.
|
||||
|
||||
### Interactive Mode
|
||||
|
||||
The easiest way to test is using interactive mode:
|
||||
|
||||
```sh
|
||||
pnpm cli
|
||||
```
|
||||
|
||||
This will prompt you for:
|
||||
- **Fetcher mode** (native, node-fetch, browser)
|
||||
- **Scraper ID** (source or embed)
|
||||
- **TMDB ID** for the content (for sources)
|
||||
- **Embed URL** (for testing embeds directly)
|
||||
- **Season/episode numbers** (for TV shows)
|
||||
|
||||
### Command Line Mode
|
||||
|
||||
For repeatability and automation, you can specify arguments directly:
|
||||
|
||||
```sh
|
||||
# Get help with all available options
|
||||
pnpm cli --help
|
||||
|
||||
# Test a movie scraper
|
||||
pnpm cli --source-id catflix --tmdb-id 11527
|
||||
|
||||
# Test a TV show scraper (Arcane S1E1)
|
||||
pnpm cli --source-id zoechip --tmdb-id 94605 --season 1 --episode 1
|
||||
|
||||
# Test an embed scraper directly with a URL
|
||||
pnpm cli --source-id turbovid --url "https://turbovid.eu/embed/DjncbDBEmbLW"
|
||||
```
|
||||
|
||||
### Common CLI Examples
|
||||
|
||||
```sh
|
||||
# Popular test cases
|
||||
pnpm cli --source-id catflix --tmdb-id 11527 # The Shining
|
||||
pnpm cli --source-id embedsu --tmdb-id 129 # Spirited Away
|
||||
pnpm cli --source-id vidsrc --tmdb-id 94605 --season 1 --episode 1 # Arcane S1E1
|
||||
|
||||
# Testing different fetcher modes
|
||||
pnpm cli --fetcher native --source-id catflix --tmdb-id 11527
|
||||
pnpm cli --fetcher browser --source-id catflix --tmdb-id 11527
|
||||
```
|
||||
|
||||
### Fetcher Options
|
||||
|
||||
The CLI supports different fetcher modes:
|
||||
|
||||
- **`native`**: Uses Node.js built-in fetch (undici) - fastest
|
||||
- **`node-fetch`**: Uses the node-fetch library
|
||||
- **`browser`**: Starts headless Chrome for browser-like environment
|
||||
|
||||
::alert{type="warning"}
|
||||
The browser fetcher requires running `pnpm build` first, otherwise you'll get outdated results.
|
||||
::
|
||||
|
||||
### Understanding CLI Output
|
||||
|
||||
#### Source Scraper Output (Returns Embeds)
|
||||
```sh
|
||||
pnpm cli --source-id catflix --tmdb-id 11527
|
||||
```
|
||||
|
||||
Example output:
|
||||
```json
|
||||
{
|
||||
embeds: [
|
||||
{
|
||||
embedId: 'turbovid',
|
||||
url: 'https://turbovid.eu/embed/DjncbDBEmbLW'
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Embed Scraper Output (Returns Streams)
|
||||
```sh
|
||||
pnpm cli --source-id turbovid --url "https://turbovid.eu/embed/DjncbDBEmbLW"
|
||||
```
|
||||
|
||||
Example output:
|
||||
```json
|
||||
{
|
||||
stream: [
|
||||
{
|
||||
type: 'hls',
|
||||
id: 'primary',
|
||||
playlist: 'https://proxy.fifthwit.net/m3u8-proxy?url=https%3A%2F%2Fqueenselti.pro%2Fwrofm%2Fuwu.m3u8&headers=%7B%22referer%22%3A%22https%3A%2F%2Fturbovid.eu%2F%22%2C%22origin%22%3A%22https%3A%2F%2Fturbovid.eu%22%7D',
|
||||
flags: [],
|
||||
captions: []
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Notice the proxied URL**: The `createM3U8ProxyUrl()` function creates URLs like `https://proxy.fifthwit.net/m3u8-proxy?url=...&headers=...` to handle protected streams. Read more about this in [Advanced Concepts](/in-depth/advanced-concepts).
|
||||
|
||||
#### Interactive Mode Flow
|
||||
```sh
|
||||
pnpm cli
|
||||
```
|
||||
|
||||
```
|
||||
✔ Select a fetcher mode · native
|
||||
✔ Select a source · catflix
|
||||
✔ TMDB ID · 11527
|
||||
✔ Media type · movie
|
||||
✓ Done!
|
||||
{
|
||||
embeds: [
|
||||
{
|
||||
embedId: 'turbovid',
|
||||
url: 'https://turbovid.eu/embed/DjncbDBEmbLW'
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Development Workflow
|
||||
|
||||
1. **Setup**: Create `.env` file and install dependencies
|
||||
2. **Research**: Study the target website's structure and player technology
|
||||
3. **Code**: Build your scraper following the established patterns
|
||||
4. **Register**: Add to `all.ts` with unique rank
|
||||
5. **Test**: Use CLI to test with multiple different movies and TV shows
|
||||
6. **Iterate**: Fix issues and improve reliability
|
||||
7. **Submit**: Create pull request with thorough testing documentation
|
||||
|
||||
## Next Steps
|
||||
|
||||
Once your environment is set up:
|
||||
|
||||
1. Read [Provider System Overview](/in-depth/provider-system) to understand how scrapers work
|
||||
2. Learn [Building Scrapers](/in-depth/building-scrapers) for detailed implementation guide
|
||||
3. Check [Advanced Concepts](/in-depth/advanced-concepts) for error handling and best practices
|
||||
|
||||
::alert{type="info"}
|
||||
Always test your scrapers with multiple different movies and TV shows to ensure reliability across different content types.
|
||||
::
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
# Flags
|
||||
|
||||
Flags is the primary way the library separates entities between different environments.
|
||||
For example, some sources only give back content that has the CORS headers set to allow anyone, so that source gets the flag `CORS_ALLOWED`. Now if you set your target to `BROWSER`, sources without that flag won't even get listed.
|
||||
|
||||
This concept is applied in multiple away across the library.
|
||||
|
||||
## Flag options
|
||||
- `CORS_ALLOWED`: Headers from the output streams are set to allow any origin.
|
||||
- `IP_LOCKED`: The streams are locked by IP: requester and watcher must be the same.
|
||||
230
.docs/content/3.in-depth/2.provider-system.md
Normal file
230
.docs/content/3.in-depth/2.provider-system.md
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
# Provider System Overview
|
||||
|
||||
Understanding how the provider system works is crucial for building effective scrapers.
|
||||
|
||||
## The all.ts Registration System
|
||||
|
||||
All scrapers must be registered in `src/providers/all.ts`. This central file exports two main functions that gather all available providers:
|
||||
|
||||
```typescript
|
||||
// src/providers/all.ts
|
||||
import { Embed, Sourcerer } from '@/providers/base';
|
||||
import { embedsuScraper } from './sources/embedsu';
|
||||
import { turbovidScraper } from './embeds/turbovid';
|
||||
|
||||
export function gatherAllSources(): Array<Sourcerer> {
|
||||
return [
|
||||
cuevana3Scraper,
|
||||
catflixScraper,
|
||||
embedsuScraper, // Your source scraper goes here
|
||||
// ... more sources
|
||||
];
|
||||
}
|
||||
|
||||
export function gatherAllEmbeds(): Array<Embed> {
|
||||
return [
|
||||
upcloudScraper,
|
||||
turbovidScraper, // Your embed scraper goes here
|
||||
// ... more embeds
|
||||
];
|
||||
}
|
||||
```
|
||||
|
||||
**Why this matters:**
|
||||
- Only registered scrapers are available to the library
|
||||
- The order in these arrays doesn't matter (ranking determines priority)
|
||||
- You must import your scraper and add it to the appropriate function
|
||||
|
||||
## Provider Types
|
||||
|
||||
There are two distinct types of providers in the system:
|
||||
|
||||
### Sources (Primary Scrapers)
|
||||
**Sources** find content on websites and return either:
|
||||
- **Direct video streams** (ready to play immediately)
|
||||
- **Embed URLs** that need further processing by embed scrapers
|
||||
|
||||
**Characteristics:**
|
||||
- Handle website navigation and search
|
||||
- Process TMDB IDs to find content
|
||||
- Can return multiple server options
|
||||
- Located in `src/providers/sources/`
|
||||
|
||||
**Example source workflow:**
|
||||
1. Receive movie/show request with TMDB ID
|
||||
2. Search the target website for that content
|
||||
3. Extract embed player URLs or direct streams
|
||||
4. Return results for further processing
|
||||
|
||||
### Embeds (Secondary Scrapers)
|
||||
**Embeds** extract playable video streams from embed players:
|
||||
- Take URLs from sources as input
|
||||
- Handle player-specific extraction and decryption
|
||||
- Always return direct streams (never more embeds)
|
||||
|
||||
**Characteristics:**
|
||||
- Focus on one player type (turbovid, mixdrop, etc.)
|
||||
- Handle complex decryption/obfuscation
|
||||
- Specialized for specific player technologies
|
||||
- Located in `src/providers/embeds/`
|
||||
|
||||
**Example embed workflow:**
|
||||
1. Receive embed player URL from a source
|
||||
2. Fetch and parse the embed page
|
||||
3. Extract/decrypt the video stream URLs
|
||||
4. Return playable HLS or MP4 streams
|
||||
|
||||
## Ranking System
|
||||
|
||||
Every scraper has a **rank** that determines its priority in the execution queue:
|
||||
|
||||
### How Ranking Works
|
||||
- **Higher numbers = Higher priority** (processed first)
|
||||
- **Each rank must be unique** across all providers
|
||||
- Sources and embeds have separate ranking spaces
|
||||
- Failed scrapers are skipped, next rank is tried
|
||||
|
||||
### Rank Ranges
|
||||
```typescript
|
||||
// Typical rank ranges (not enforced, but conventional)
|
||||
Sources: 1-300
|
||||
Embeds: 1-250
|
||||
|
||||
// Example rankings
|
||||
export const embedsuScraper = makeSourcerer({
|
||||
id: 'embedsu',
|
||||
rank: 165, // Medium priority source
|
||||
// ...
|
||||
});
|
||||
|
||||
export const turbovidScraper = makeEmbed({
|
||||
id: 'turbovid',
|
||||
rank: 122, // Medium priority embed
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
### Choosing a Rank
|
||||
|
||||
**For Sources:**
|
||||
- **200+**: High-quality, reliable sources (fast APIs, good uptime)
|
||||
- **100-199**: Medium reliability sources (most scrapers fall here)
|
||||
- **1-99**: Lower priority or experimental sources
|
||||
|
||||
**For Embeds:**
|
||||
- **200+**: Fast, reliable embeds (direct URLs, minimal processing)
|
||||
- **100-199**: Standard embeds (typical decryption/extraction)
|
||||
- **1-99**: Slow or unreliable embeds (complex decryption, poor uptime)
|
||||
|
||||
### Finding Available Ranks
|
||||
|
||||
Before choosing a rank, check what's already taken:
|
||||
|
||||
```sh
|
||||
# Search for existing ranks
|
||||
grep -r "rank:" src/providers/ | sort -t: -k3 -n
|
||||
```
|
||||
|
||||
Or check the all.ts file to see which scrapers are currently registered and their ranks.
|
||||
|
||||
::alert{type="warning"}
|
||||
**Duplicate ranks will cause conflicts!** Always verify your chosen rank is unique before submitting.
|
||||
::
|
||||
|
||||
## Provider Configuration
|
||||
|
||||
Each provider is configured using `makeSourcerer()` or `makeEmbed()`:
|
||||
|
||||
### Source Configuration
|
||||
```typescript
|
||||
export const mySourceScraper = makeSourcerer({
|
||||
id: 'my-source', // Unique identifier (kebab-case)
|
||||
name: 'My Source', // Display name (human-readable)
|
||||
rank: 150, // Priority rank (must be unique)
|
||||
disabled: false, // Whether scraper is disabled
|
||||
flags: [], // Feature flags (see Advanced Concepts)
|
||||
scrapeMovie: comboScraper, // Function for movies
|
||||
scrapeShow: comboScraper, // Function for TV shows
|
||||
});
|
||||
```
|
||||
|
||||
### Embed Configuration
|
||||
```typescript
|
||||
export const myEmbedScraper = makeEmbed({
|
||||
id: 'my-embed', // Unique identifier (kebab-case)
|
||||
name: 'My Embed', // Display name (human-readable)
|
||||
rank: 120, // Priority rank (must be unique)
|
||||
disabled: false, // Whether scraper is disabled
|
||||
async scrape(ctx) { // Single scrape function for embeds
|
||||
// ... extraction logic
|
||||
return { stream: [...] };
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## How Providers Work Together
|
||||
|
||||
The provider system creates a powerful pipeline:
|
||||
|
||||
### 1. Source → Embed Chain
|
||||
```
|
||||
User Request → Source Scraper → Embed URLs → Embed Scraper → Video Stream → Player
|
||||
```
|
||||
|
||||
**Pipeline Steps:**
|
||||
1. **User Request** - User wants to watch content
|
||||
2. **Source Scraper** - Finds content on websites
|
||||
3. **Embed URLs** - Returns player URLs that need processing
|
||||
4. **Embed Scraper** - Extracts streams from player URLs
|
||||
5. **Video Stream** - Final playable stream
|
||||
6. **Player** - User watches the content
|
||||
|
||||
### 2. Multiple Server Options
|
||||
Sources can provide multiple backup servers:
|
||||
```typescript
|
||||
// Source returns multiple embed options
|
||||
return {
|
||||
embeds: [
|
||||
{ embedId: 'turbovid', url: 'https://turbovid.com/abc' },
|
||||
{ embedId: 'mixdrop', url: 'https://mixdrop.co/def' },
|
||||
{ embedId: 'dood', url: 'https://dood.watch/ghi' }
|
||||
]
|
||||
};
|
||||
```
|
||||
|
||||
### 3. Fallback System
|
||||
If one embed fails, the system tries the next:
|
||||
1. Try turbovid embed (rank 122)
|
||||
2. If fails, try mixdrop embed (rank 198)
|
||||
3. If fails, try dood embed (rank 173)
|
||||
4. Continue until success or all options exhausted
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Naming Conventions
|
||||
- **IDs**: Use kebab-case (`my-scraper`, not `myMyscraper` or `My_Scraper`)
|
||||
- **Names**: Use proper capitalization (`VidCloud`, not `vidcloud` or `VIDCLOUD`)
|
||||
- **Files**: Match the ID (`my-scraper.ts` for ID `my-scraper`)
|
||||
|
||||
### Registration Order
|
||||
- The order in `all.ts` arrays doesn't affect execution (rank does)
|
||||
- Group similar scrapers together for maintainability
|
||||
- Add imports at the top, organized logically
|
||||
|
||||
### Testing Integration
|
||||
Always test that your registration works:
|
||||
```sh
|
||||
# Verify your scraper appears in the list (interactive mode shows all available)
|
||||
pnpm cli
|
||||
|
||||
# Test your specific scraper
|
||||
pnpm cli --source-id my-scraper --tmdb-id 11527
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
Now that you understand the provider system:
|
||||
|
||||
1. Learn the details in [Building Scrapers](/in-depth/building-scrapers)
|
||||
2. Study [Advanced Concepts](/in-depth/advanced-concepts) for flags and error handling
|
||||
3. Look at the [Sources vs Embeds](/in-depth/sources-and-embeds) guide for more examples
|
||||
487
.docs/content/3.in-depth/3.building-scrapers.md
Normal file
487
.docs/content/3.in-depth/3.building-scrapers.md
Normal file
|
|
@ -0,0 +1,487 @@
|
|||
# Building Scrapers
|
||||
|
||||
This guide covers the technical details of implementing scrapers, from basic structure to advanced patterns.
|
||||
|
||||
## The Combo Scraper Pattern
|
||||
|
||||
The most common and recommended pattern is the "combo scraper" that handles both movies and TV shows with a single function. This reduces code duplication and ensures consistent behavior.
|
||||
|
||||
### Basic Structure
|
||||
|
||||
```typescript
|
||||
import { SourcererEmbed, SourcererOutput, makeSourcerer } from '@/providers/base';
|
||||
import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context';
|
||||
import { NotFoundError } from '@/utils/errors';
|
||||
|
||||
// Main scraping function that handles both movies and TV shows
|
||||
async function comboScraper(ctx: ShowScrapeContext | MovieScrapeContext): Promise<SourcererOutput> {
|
||||
// 1. Build the appropriate URL based on media type
|
||||
const embedUrl = `https://embed.su/embed/${
|
||||
ctx.media.type === 'movie'
|
||||
? `movie/${ctx.media.tmdbId}`
|
||||
: `tv/${ctx.media.tmdbId}/${ctx.media.season.number}/${ctx.media.episode.number}`
|
||||
}`;
|
||||
|
||||
// 2. Fetch the embed page using proxied fetcher
|
||||
const embedPage = await ctx.proxiedFetcher<string>(embedUrl, {
|
||||
headers: {
|
||||
Referer: 'https://embed.su/',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
||||
},
|
||||
});
|
||||
|
||||
// 3. Extract and decode configuration
|
||||
const vConfigMatch = embedPage.match(/window\.vConfig\s*=\s*JSON\.parse\(atob\(`([^`]+)/i);
|
||||
const encodedConfig = vConfigMatch?.[1];
|
||||
if (!encodedConfig) throw new NotFoundError('No encoded config found');
|
||||
|
||||
// 4. Process the data (decode, decrypt, etc.)
|
||||
const decodedConfig = JSON.parse(await stringAtob(encodedConfig));
|
||||
if (!decodedConfig?.hash) throw new NotFoundError('No stream hash found');
|
||||
|
||||
// 5. Update progress to show we're making progress
|
||||
ctx.progress(50);
|
||||
|
||||
// 6. Build the final result
|
||||
const embeds: SourcererEmbed[] = secondDecode.map((server) => ({
|
||||
embedId: 'viper', // ID of the embed scraper to handle this URL
|
||||
url: `https://embed.su/api/e/${server.hash}`,
|
||||
}));
|
||||
|
||||
ctx.progress(90);
|
||||
|
||||
return { embeds };
|
||||
}
|
||||
|
||||
// Export the scraper configuration
|
||||
export const embedsuScraper = makeSourcerer({
|
||||
id: 'embedsu', // Unique identifier
|
||||
name: 'embed.su', // Display name
|
||||
rank: 165, // Priority rank (must be unique)
|
||||
disabled: false, // Whether the scraper is disabled
|
||||
flags: [], // Feature flags (see Advanced Concepts)
|
||||
scrapeMovie: comboScraper, // Function for movies
|
||||
scrapeShow: comboScraper, // Function for TV shows
|
||||
});
|
||||
```
|
||||
|
||||
### Alternative: Separate Functions
|
||||
|
||||
For complex cases where movie and TV show logic differs significantly. However, its best to use combo scraper!
|
||||
|
||||
```typescript
|
||||
async function scrapeMovie(ctx: MovieScrapeContext): Promise<SourcererOutput> {
|
||||
// Movie-specific logic
|
||||
const movieUrl = `${baseUrl}/movie/${ctx.media.tmdbId}`;
|
||||
// ... movie processing
|
||||
}
|
||||
|
||||
async function scrapeShow(ctx: ShowScrapeContext): Promise<SourcererOutput> {
|
||||
// TV show-specific logic
|
||||
const showUrl = `${baseUrl}/tv/${ctx.media.tmdbId}/${ctx.media.season.number}/${ctx.media.episode.number}`;
|
||||
// ... show processing
|
||||
}
|
||||
|
||||
export const myScraper = makeSourcerer({
|
||||
id: 'my-scraper',
|
||||
name: 'My Scraper',
|
||||
rank: 150,
|
||||
disabled: false,
|
||||
flags: [],
|
||||
scrapeMovie: scrapeMovie, // Separate functions
|
||||
scrapeShow: scrapeShow,
|
||||
});
|
||||
```
|
||||
|
||||
## Return Types
|
||||
|
||||
A `SourcererOutput` can return two types of data. Understanding when to use each is crucial:
|
||||
|
||||
### 1. Embeds Array (Most Common)
|
||||
|
||||
Use when your scraper finds embed players that need further processing:
|
||||
|
||||
```typescript
|
||||
return {
|
||||
embeds: [
|
||||
{
|
||||
embedId: 'turbovid', // Must match an existing embed scraper ID
|
||||
url: 'https://turbovid.com/embed/abc123'
|
||||
},
|
||||
{
|
||||
embedId: 'mixdrop', // Backup option
|
||||
url: 'https://mixdrop.co/embed/def456'
|
||||
}
|
||||
]
|
||||
};
|
||||
```
|
||||
|
||||
**When to use:**
|
||||
- Your scraper finds embed player URLs
|
||||
- You want to leverage existing embed scrapers
|
||||
- The site uses common players (turbovid, mixdrop, etc.)
|
||||
- You want to provide multiple server options
|
||||
|
||||
### 2. Stream Array (Direct Streams)
|
||||
|
||||
Use when your scraper finds direct video streams that are ready to play:
|
||||
|
||||
```typescript
|
||||
import { flags } from '@/entrypoint/utils/targets';
|
||||
|
||||
// For HLS streams
|
||||
return {
|
||||
embeds: [], // Can be empty when returning streams
|
||||
stream: [
|
||||
{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: streamUrl,
|
||||
flags: [flags.CORS_ALLOWED],
|
||||
captions: [], // Subtitle tracks (optional)
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
// For MP4 files with a single quality
|
||||
return {
|
||||
embeds: [],
|
||||
stream: [
|
||||
{
|
||||
id: 'primary',
|
||||
captions,
|
||||
qualities: {
|
||||
unknown: {
|
||||
type: 'mp4',
|
||||
url: streamUrl,
|
||||
},
|
||||
},
|
||||
type: 'file',
|
||||
flags: [flags.CORS_ALLOWED],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// For MP4 files with multiple qualities:
|
||||
// It's recommended to return it using a function similar to this:
|
||||
|
||||
const streams = Object.entries(data.streams).reduce((acc: Record<string, string>, [quality, url]) => {
|
||||
let qualityKey: number;
|
||||
if (quality === 'ORG') {
|
||||
// Only add unknown quality if it's an mp4 (handle URLs with query parameters)
|
||||
const urlPath = url.split('?')[0]; // Remove query parameters
|
||||
if (urlPath.toLowerCase().endsWith('.mp4')) {
|
||||
acc.unknown = url;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
if (quality === '4K') {
|
||||
qualityKey = 2160;
|
||||
} else {
|
||||
qualityKey = parseInt(quality.replace('P', ''), 10);
|
||||
}
|
||||
if (Number.isNaN(qualityKey) || acc[qualityKey]) return acc;
|
||||
acc[qualityKey] = url;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
// Filter qualities based on provider type
|
||||
const filteredStreams = Object.entries(streams).reduce((acc: Record<string, string>, [quality, url]) => {
|
||||
// Skip unknown for cached provider
|
||||
if (provider.useCacheUrl && quality === 'unknown') {
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[quality] = url;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
// Returning each quality like so
|
||||
return {
|
||||
stream: [
|
||||
{
|
||||
id: 'primary',
|
||||
captions: [],
|
||||
qualities: {
|
||||
...(filteredStreams[2160] && {
|
||||
'4k': {
|
||||
type: 'mp4',
|
||||
url: filteredStreams[2160],
|
||||
},
|
||||
}),
|
||||
...(filteredStreams[1080] && {
|
||||
1080: {
|
||||
type: 'mp4',
|
||||
url: filteredStreams[1080],
|
||||
},
|
||||
}),
|
||||
...(filteredStreams[720] && {
|
||||
720: {
|
||||
type: 'mp4',
|
||||
url: filteredStreams[720],
|
||||
},
|
||||
}),
|
||||
...(filteredStreams[480] && {
|
||||
480: {
|
||||
type: 'mp4',
|
||||
url: filteredStreams[480],
|
||||
},
|
||||
}),
|
||||
...(filteredStreams[360] && {
|
||||
360: {
|
||||
type: 'mp4',
|
||||
url: filteredStreams[360],
|
||||
},
|
||||
}),
|
||||
...(filteredStreams.unknown && {
|
||||
unknown: {
|
||||
type: 'mp4',
|
||||
url: filteredStreams.unknown,
|
||||
},
|
||||
}),
|
||||
},
|
||||
type: 'file',
|
||||
flags: [flags.CORS_ALLOWED],
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
**When to use:**
|
||||
- Your scraper can extract direct video URLs
|
||||
- The site provides its own player technology
|
||||
- You need fine control over stream handling
|
||||
- The streams don't require complex embed processing
|
||||
|
||||
## Context and Utilities
|
||||
|
||||
The scraper context (`ctx`) provides everything you need for implementation:
|
||||
|
||||
### Media Information
|
||||
```typescript
|
||||
// Basic media info (always available)
|
||||
ctx.media.title // "Spirited Away"
|
||||
ctx.media.type // "movie" | "show"
|
||||
ctx.media.tmdbId // 129
|
||||
ctx.media.releaseYear // 2001
|
||||
ctx.media.imdbId // "tt0245429" (when available)
|
||||
|
||||
// For TV shows only (check ctx.media.type === 'show')
|
||||
ctx.media.season.number // 1
|
||||
ctx.media.season.tmdbId // Season TMDB ID
|
||||
ctx.media.episode.number // 5
|
||||
ctx.media.episode.tmdbId // Episode TMDB ID
|
||||
```
|
||||
|
||||
### HTTP Client
|
||||
```typescript
|
||||
// Always use proxiedFetcher for external requests to avoid CORS
|
||||
const response = await ctx.proxiedFetcher<string>('https://example.com/api', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0...',
|
||||
'Referer': 'https://example.com'
|
||||
},
|
||||
body: JSON.stringify({ key: 'value' })
|
||||
});
|
||||
|
||||
// For API calls with base URL
|
||||
const data = await ctx.proxiedFetcher('/search', {
|
||||
baseUrl: 'https://api.example.com',
|
||||
query: { q: ctx.media.title, year: ctx.media.releaseYear }
|
||||
});
|
||||
```
|
||||
|
||||
### Progress Updates
|
||||
```typescript
|
||||
// Update the loading indicator (0-100)
|
||||
ctx.progress(25); // Found media page
|
||||
// ... processing ...
|
||||
ctx.progress(50); // Extracted embed links
|
||||
// ... more processing ...
|
||||
ctx.progress(90); // Almost done
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### 1. URL Building
|
||||
```typescript
|
||||
// Handle different media types
|
||||
const buildUrl = (ctx: ShowScrapeContext | MovieScrapeContext) => {
|
||||
const apiUrl = ctx.media.type === 'movie'
|
||||
? `${baseUrl}/movie/${ctx.media.tmdbId}`
|
||||
: `${baseUrl}/tv/${ctx.media.tmdbId}/${ctx.media.season.number}/${ctx.media.episode.number}`;
|
||||
|
||||
return apiUrl;
|
||||
};
|
||||
```
|
||||
|
||||
### 2. Data Extraction
|
||||
```typescript
|
||||
import { load } from 'cheerio';
|
||||
|
||||
// Scraping with Cheerio
|
||||
const $ = load(embedPage);
|
||||
const embedUrls = $('iframe[src*="turbovid"]')
|
||||
.map((_, el) => $(el).attr('src'))
|
||||
.get()
|
||||
.filter(Boolean);
|
||||
|
||||
// Regex extraction
|
||||
const configMatch = embedPage.match(/window\.playerConfig\s*=\s*({.*?});/s);
|
||||
if (configMatch) {
|
||||
const config = JSON.parse(configMatch[1]);
|
||||
// Process config...
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Error Handling
|
||||
```typescript
|
||||
import { NotFoundError } from '@/utils/errors';
|
||||
|
||||
// Throw NotFoundError for content not found
|
||||
if (!embedUrls.length) {
|
||||
throw new NotFoundError('No embed players found');
|
||||
}
|
||||
|
||||
// Throw generic Error for other issues
|
||||
if (!apiResponse.success) {
|
||||
throw new Error(`API request failed: ${apiResponse.message}`);
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Protected Streams
|
||||
```typescript
|
||||
import { createM3U8ProxyUrl } from '@/utils/proxy';
|
||||
|
||||
// For streams that require special headers
|
||||
const streamHeaders = {
|
||||
'Referer': 'https://player.example.com/',
|
||||
'Origin': 'https://player.example.com',
|
||||
'User-Agent': 'Mozilla/5.0...'
|
||||
};
|
||||
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: createM3U8ProxyUrl(originalPlaylist, streamHeaders),
|
||||
flags: [], // Proxy handles CORS
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
```
|
||||
|
||||
## Building Embed Scrapers
|
||||
|
||||
Embed scrapers follow a simpler pattern since they only handle one URL type:
|
||||
|
||||
```typescript
|
||||
import { makeEmbed } from '@/providers/base';
|
||||
|
||||
export const myEmbedScraper = makeEmbed({
|
||||
id: 'my-embed',
|
||||
name: 'My Embed Player',
|
||||
rank: 120,
|
||||
async scrape(ctx) {
|
||||
// ctx.url contains the embed URL from a source
|
||||
|
||||
// 1. Fetch the embed page
|
||||
const embedPage = await ctx.proxiedFetcher(ctx.url);
|
||||
|
||||
// 2. Extract the stream URL (example with regex)
|
||||
const streamMatch = embedPage.match(/src:\s*["']([^"']+\.m3u8[^"']*)/);
|
||||
if (!streamMatch) {
|
||||
throw new NotFoundError('No stream found in embed');
|
||||
}
|
||||
|
||||
// 3. Return the stream
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: streamMatch[1],
|
||||
flags: [flags.CORS_ALLOWED],
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## Testing Your Scrapers
|
||||
|
||||
### 1. Basic Testing
|
||||
```sh
|
||||
# Test your scraper with CLI
|
||||
pnpm cli --source-id my-scraper --tmdb-id 11527
|
||||
|
||||
# Test different content types
|
||||
pnpm cli --source-id my-scraper --tmdb-id 94605 --season 1 --episode 1 # TV show
|
||||
```
|
||||
|
||||
### 2. Real CLI Output Examples
|
||||
|
||||
**Testing a source that returns embeds:**
|
||||
```sh
|
||||
pnpm cli --source-id catflix --tmdb-id 11527
|
||||
```
|
||||
```json
|
||||
{
|
||||
embeds: [
|
||||
{
|
||||
embedId: 'turbovid',
|
||||
url: 'https://turbovid.eu/embed/DjncbDBEmbLW'
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Testing an embed that returns streams:**
|
||||
```sh
|
||||
pnpm cli --source-id turbovid --url "https://turbovid.eu/embed/DjncbDBEmbLW"
|
||||
```
|
||||
```json
|
||||
{
|
||||
stream: [
|
||||
{
|
||||
type: 'hls',
|
||||
id: 'primary',
|
||||
playlist: 'https://proxy.fifthwit.net/m3u8-proxy?url=https%3A%2F%2Fqueenselti.pro%2Fwrofm%2Fuwu.m3u8&headers=%7B%22referer%22%3A%22https%3A%2F%2Fturbovid.eu%2F%22%2C%22origin%22%3A%2F%2Fturbovid.eu%22%7D',
|
||||
flags: [],
|
||||
captions: []
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Notice**: The playlist URL shows how `createM3U8ProxyUrl()` creates proxied URLs to handle protected streams.
|
||||
|
||||
### 3. Comprehensive Testing
|
||||
Test with various content:
|
||||
- Popular movies (The Shining: 11527, Spirited Away: 129, Avatar: 19995)
|
||||
- Recent releases (check current popular movies)
|
||||
- TV shows with multiple seasons
|
||||
- Anime series (different episode numbering)
|
||||
- Different languages/regions
|
||||
|
||||
### 4. Debug Mode
|
||||
```sh
|
||||
# Add debug logging to your scraper
|
||||
console.log('Fetching URL:', embedUrl);
|
||||
console.log('Response status:', response.status);
|
||||
console.log('Extracted data:', extractedData);
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
Once you've built your scraper:
|
||||
|
||||
1. Test thoroughly with multiple content types
|
||||
2. Check [Advanced Concepts](/in-depth/advanced-concepts) for flags and error handling
|
||||
3. Register in `all.ts` with a unique rank
|
||||
4. Submit a pull request with testing documentation
|
||||
|
||||
::alert{type="warning"}
|
||||
Always test your scrapers with both movies and TV shows, and include multiple examples in your pull request description.
|
||||
::
|
||||
169
.docs/content/3.in-depth/4.flags.md
Normal file
169
.docs/content/3.in-depth/4.flags.md
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
# Flags
|
||||
|
||||
Flags are the primary way the library separates entities between different environments and indicates special properties of streams.
|
||||
|
||||
For example, some sources only give back content that has the CORS headers set to allow anyone, so that source gets the flag `CORS_ALLOWED`. Now if you set your target to `BROWSER`, sources without that flag won't even get listed.
|
||||
|
||||
Sometimes a source will block netlify or cloudflare. Making self hosted proxies on P-Stream impossible. In cases where it would break some user's experiences, we should require the extension.
|
||||
|
||||
## Available Flags
|
||||
|
||||
- **`CORS_ALLOWED`**: Headers from the output streams are set to allow any origin.
|
||||
- **`IP_LOCKED`**: The streams are locked by IP: requester and watcher must be the same.
|
||||
- **`CF_BLOCKED`**: *(Cosmetic)* Indicates the source/embed blocks Cloudflare IPs. For actual enforcement, remove `CORS_ALLOWED` or add `IP_LOCKED`.
|
||||
- **`PROXY_BLOCKED`**: *(Cosmetic)* Indicates streams shouldn't be proxied. For actual enforcement, remove `CORS_ALLOWED` or add `IP_LOCKED`.
|
||||
|
||||
## How Flags Affect Target Compatibility
|
||||
|
||||
### Stream-Level Flags Impact
|
||||
|
||||
**With `CORS_ALLOWED`:**
|
||||
- ✅ Browser targets (can fetch and play streams)
|
||||
- ✅ Extension targets (bypass needed restrictions)
|
||||
- ✅ Native targets (direct stream access)
|
||||
|
||||
**Without `CORS_ALLOWED`:**
|
||||
- ❌ Browser targets (CORS restrictions block access)
|
||||
- ✅ Extension targets (can bypass CORS)
|
||||
- ✅ Native targets (no CORS restrictions)
|
||||
|
||||
**With `IP_LOCKED`:**
|
||||
- ❌ Proxy setups (different IP between request and playback)
|
||||
- ✅ Direct connections (same IP for request and playback)
|
||||
- ✅ Extension targets (when user has consistent IP)
|
||||
|
||||
**With `CF_BLOCKED` *(cosmetic only)*:**
|
||||
- 🏷️ Informational label indicating Cloudflare issues
|
||||
- ⚠️ **Still requires removing `CORS_ALLOWED` or adding `IP_LOCKED` for actual enforcement**
|
||||
|
||||
**With `PROXY_BLOCKED` *(cosmetic only)*:**
|
||||
- 🏷️ Informational label indicating proxy incompatibility
|
||||
- ⚠️ **Still requires removing `CORS_ALLOWED` or adding `IP_LOCKED` for actual enforcement**
|
||||
|
||||
### Provider-Level Flags Impact
|
||||
|
||||
**With `CORS_ALLOWED`:**
|
||||
- Source appears for all target types
|
||||
- Individual streams still need appropriate flags
|
||||
|
||||
**Without `CORS_ALLOWED`:**
|
||||
- Source only appears for extension/native targets
|
||||
- Hidden entirely from browser-only users
|
||||
|
||||
### Important: Cosmetic vs Enforcement Flags
|
||||
|
||||
**Cosmetic flags** (`CF_BLOCKED`, `PROXY_BLOCKED`) are informational labels only. They don't enforce any behavior.
|
||||
|
||||
**Enforcement flags** (`CORS_ALLOWED`, `IP_LOCKED`) actually control stream compatibility:
|
||||
- **Remove all flags**: Most common way to make streams extension/native-only (no browser support)
|
||||
- **Add `IP_LOCKED`**: Prevents proxy usage when `consistentIpForRequests` is false (rarely needed - most extension-only streams just use no flags)
|
||||
|
||||
### The Golden Rule
|
||||
|
||||
**Extension-only providers:** Remove all flags (most common case) when streams only work with extensions (e.g., need special headers or IP restrictions that only extensions can bypass).
|
||||
|
||||
**Universal providers:** Include `CORS_ALLOWED` flags when using M3U8 proxies or streams that can work across all targets.
|
||||
|
||||
## Comprehensive Flags Guide
|
||||
|
||||
For detailed information about using flags in your scrapers, including:
|
||||
- When and how to use each flag
|
||||
- Provider-level vs stream-level flags
|
||||
- Best practices and examples
|
||||
- How flags affect stream playback
|
||||
|
||||
See the [Flags System section](/in-depth/advanced-concepts#flags-system) in Advanced Concepts.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
```typescript
|
||||
import { flags } from '@/entrypoint/utils/targets';
|
||||
import { createM3U8ProxyUrl } from '@/utils/proxy';
|
||||
|
||||
// Extension-only streams (MOST COMMON - just remove all flags)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: createM3U8ProxyUrl(originalUrl, headers),
|
||||
flags: [], // No flags = extension/native only
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// Universal streams with CORS support
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: createM3U8ProxyUrl(originalUrl, headers),
|
||||
flags: [flags.CORS_ALLOWED], // Works across all targets
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// Direct streams (no proxy needed)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: 'https://example.com/playlist.m3u8',
|
||||
flags: [flags.CORS_ALLOWED], // Stream can be played directly in browsers
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// Extension-only streams (usual approach - just remove all flags)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: 'https://example.com/playlist.m3u8',
|
||||
flags: [], // No flags = extension/native only (most common)
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// Cloudflare-blocked streams with cosmetic label (if needed)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: 'https://example.com/playlist.m3u8',
|
||||
flags: [flags.CF_BLOCKED], // Cosmetic only - still extension/native only due to no CORS_ALLOWED
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// IP-locked streams (when you specifically need consistent IP)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: 'https://example.com/playlist.m3u8',
|
||||
flags: [flags.IP_LOCKED], // Prevents proxy usage when IP consistency required
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// IP-locked streams (when you specifically need consistent IP)
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: 'https://example.com/playlist.m3u8',
|
||||
flags: [flags.IP_LOCKED], // Prevents proxy usage when IP consistency required
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
|
||||
// Provider-level flags affect source visibility
|
||||
export const myScraper = makeSourcerer({
|
||||
id: 'my-scraper',
|
||||
name: 'My Scraper',
|
||||
rank: 150,
|
||||
flags: [flags.CORS_ALLOWED], // Source shows for all targets
|
||||
scrapeMovie: comboScraper,
|
||||
scrapeShow: comboScraper,
|
||||
});
|
||||
```
|
||||
228
.docs/content/3.in-depth/5.advanced-concepts.md
Normal file
228
.docs/content/3.in-depth/5.advanced-concepts.md
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
# Advanced Concepts
|
||||
|
||||
This guide covers advanced topics for building robust and reliable scrapers.
|
||||
|
||||
## Stream Protection and Proxying
|
||||
|
||||
Modern streaming services use various protection mechanisms.
|
||||
|
||||
### Common Protections
|
||||
|
||||
1. **Referer Checking** - URLs only work from specific domains
|
||||
2. **CORS Restrictions** - Prevent browser access from unauthorized origins
|
||||
3. **Geographic Blocking** - IP-based access restrictions
|
||||
4. **Time-Limited Tokens** - URLs expire after short periods
|
||||
5. **User-Agent Filtering** - Only allow specific browsers/clients
|
||||
|
||||
### Handling Protected Streams
|
||||
|
||||
**Use M3U8 proxy for HLS streams:**
|
||||
|
||||
Using the createM3U8ProxyUrl function we can use our configured M3U8 proxy to send headers to the playlist and all it's segments.
|
||||
|
||||
```typescript
|
||||
import { createM3U8ProxyUrl } from '@/utils/proxy';
|
||||
|
||||
// Extract the original stream URL
|
||||
const originalPlaylist = 'https://protected-cdn.example.com/playlist.m3u8';
|
||||
|
||||
// Headers required by the streaming service
|
||||
const streamHeaders = {
|
||||
'Referer': 'https://player.example.com/',
|
||||
'Origin': 'https://player.example.com',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
};
|
||||
|
||||
// Create proxied URL that handles headers and CORS
|
||||
const proxiedUrl = createM3U8ProxyUrl(originalPlaylist, streamHeaders);
|
||||
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: proxiedUrl, // Use proxied URL
|
||||
flags: [flags.CORS_ALLOWED], // Proxy enables CORS for all targets
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
```
|
||||
|
||||
### Stream Validation Bypass
|
||||
|
||||
When using M3U8 proxies that are origin-locked (like P-Stream proxies), you may need to bypass automatic stream validation in `valid.ts`:
|
||||
|
||||
```typescript
|
||||
// In src/utils/valid.ts, add your scraper ID to skip validation
|
||||
const SKIP_VALIDATION_CHECK_IDS = [
|
||||
// ... existing IDs
|
||||
'your-scraper-id', // Add your scraper ID here
|
||||
];
|
||||
```
|
||||
|
||||
**Why this is needed:**
|
||||
- By default, all streams are validated by attempting to fetch metadata
|
||||
- The validation uses `proxiedFetcher` to check if streams are playable
|
||||
- If your proxy blocks the fetcher (origin-locked), validation will fail
|
||||
- But the proxied URL should still work in the actual player context
|
||||
- Adding to skip list bypasses validation and returns the proxied URL directly without checking it
|
||||
|
||||
**When to skip validation:**
|
||||
- Your scraper uses origin-locked proxies
|
||||
- The proxy service blocks programmatic access
|
||||
- Validation consistently fails but streams work in browsers
|
||||
- You're certain the proxy setup is correct
|
||||
|
||||
**Use setupProxy for MP4 streams:**
|
||||
When adding headers in the stream response, usually may need to use the extension or native to send the correct headers in the request.
|
||||
|
||||
```typescript
|
||||
import { setupProxy } from '@/utils/proxy';
|
||||
|
||||
let stream = {
|
||||
id: 'primary',
|
||||
type: 'file',
|
||||
flags: [],
|
||||
qualities: {
|
||||
'1080p': { url: 'https://protected-cdn.example.com/video.mp4' }
|
||||
},
|
||||
headers: {
|
||||
'Referer': 'https://player.example.com/',
|
||||
'User-Agent': 'Mozilla/5.0...'
|
||||
},
|
||||
captions: []
|
||||
};
|
||||
|
||||
// setupProxy will handle proxying if needed
|
||||
stream = setupProxy(stream);
|
||||
|
||||
return { stream: [stream] };
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Efficient Data Extraction
|
||||
|
||||
**Use targeted selectors:**
|
||||
```typescript
|
||||
// ✅ Good - specific selector
|
||||
const embedUrl = $('iframe[src*="turbovid"]').attr('src');
|
||||
|
||||
// ❌ Bad - searches entire document
|
||||
const embedUrl = $('*').filter((_, el) => $(el).attr('src')?.includes('turbovid')).attr('src');
|
||||
```
|
||||
|
||||
**Cache expensive operations:**
|
||||
```typescript
|
||||
// Cache parsed data to avoid re-parsing
|
||||
let cachedConfig;
|
||||
if (!cachedConfig) {
|
||||
cachedConfig = JSON.parse(configString);
|
||||
}
|
||||
```
|
||||
|
||||
### Minimize HTTP Requests
|
||||
|
||||
**Combine operations when possible:**
|
||||
```typescript
|
||||
// ✅ Good - single request with full processing
|
||||
const embedPage = await ctx.proxiedFetcher(embedUrl);
|
||||
const streams = extractAllStreams(embedPage);
|
||||
|
||||
// ❌ Bad - multiple requests for same page
|
||||
const page1 = await ctx.proxiedFetcher(embedUrl);
|
||||
const config = extractConfig(page1);
|
||||
const page2 = await ctx.proxiedFetcher(embedUrl); // Duplicate request
|
||||
const streams = extractStreams(page2);
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Input Validation
|
||||
|
||||
**Validate external data:**
|
||||
```typescript
|
||||
// Validate URLs before using them
|
||||
const isValidUrl = (url: string) => {
|
||||
try {
|
||||
new URL(url);
|
||||
return url.startsWith('http://') || url.startsWith('https://');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
if (!isValidUrl(streamUrl)) {
|
||||
throw new Error('Invalid stream URL received');
|
||||
}
|
||||
```
|
||||
|
||||
**Sanitize regex inputs:**
|
||||
```typescript
|
||||
// Be careful with dynamic regex
|
||||
const safeTitle = ctx.media.title.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const titleRegex = new RegExp(safeTitle, 'i');
|
||||
```
|
||||
|
||||
### Safe JSON Parsing
|
||||
|
||||
```typescript
|
||||
// Handle malformed JSON gracefully
|
||||
let config;
|
||||
try {
|
||||
config = JSON.parse(configString);
|
||||
} catch (error) {
|
||||
throw new Error('Invalid configuration format');
|
||||
}
|
||||
|
||||
// Validate expected structure
|
||||
if (!config || typeof config !== 'object' || !config.streams) {
|
||||
throw new Error('Invalid configuration structure');
|
||||
}
|
||||
```
|
||||
|
||||
## Testing and Debugging
|
||||
|
||||
### Debug Logging
|
||||
|
||||
```typescript
|
||||
// Add temporary debug logs (remove before submitting)
|
||||
console.log('Request URL:', requestUrl);
|
||||
console.log('Response headers:', response.headers);
|
||||
console.log('Extracted data:', extractedData);
|
||||
```
|
||||
|
||||
### Test Edge Cases
|
||||
|
||||
- Content with special characters in titles
|
||||
- Very new releases (might not be available)
|
||||
- Old content (might have different URL patterns)
|
||||
- Different regions (geographic restrictions)
|
||||
- Different quality levels
|
||||
|
||||
### Common Debugging Steps
|
||||
|
||||
1. **Verify URLs are correct**
|
||||
2. **Check HTTP status codes**
|
||||
3. **Inspect response headers**
|
||||
4. **Validate extracted data structure**
|
||||
5. **Test with different content types**
|
||||
|
||||
## Best Practices Summary
|
||||
|
||||
1. **Always use `ctx.proxiedFetcher`** for external requests
|
||||
2. **Throw `NotFoundError`** for content-not-found scenarios
|
||||
3. **Update progress** at meaningful milestones
|
||||
4. **Use appropriate flags** for stream capabilities
|
||||
5. **Handle protected streams** with proxy functions
|
||||
6. **Validate external data** before using it
|
||||
7. **Test thoroughly** with diverse content
|
||||
8. **Document your implementation** in pull requests
|
||||
|
||||
## Next Steps
|
||||
|
||||
With these advanced concepts:
|
||||
|
||||
1. Review [Sources vs Embeds](/in-depth/sources-and-embeds) for architectural patterns
|
||||
2. Study existing scrapers in `src/providers/` for real examples
|
||||
3. Test your implementation thoroughly
|
||||
4. Submit pull requests with detailed testing documentation
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
icon: ph:atom-fill
|
||||
navigation.redirect: /in-depth/sources-and-embeds
|
||||
navigation.redirect: /in-depth/new-providers
|
||||
navigation.title: "In-depth"
|
||||
|
|
|
|||
|
|
@ -1,72 +0,0 @@
|
|||
# Development / contributing
|
||||
|
||||
::alert{type="warning"}
|
||||
This page isn't quite done yet, stay tuned!
|
||||
::
|
||||
|
||||
<!--
|
||||
TODO
|
||||
- Development setup
|
||||
- How to make new sources/embeds (link to the page)
|
||||
- How to use the fetchers, when to use proxiedFetcher
|
||||
- How to use the context
|
||||
-->
|
||||
|
||||
## Testing using the CLI
|
||||
|
||||
Testing can be quite difficult for this library, unit tests can't really be made because of the unreliable nature of scrapers.
|
||||
But manually testing by writing an entry-point is also really annoying.
|
||||
|
||||
Our solution is to make a CLI that you can use to run the scrapers. For everything else there are unit tests.
|
||||
|
||||
### Setup
|
||||
Make a `.env` file in the root of the repository and add a TMDB API key: `MOVIE_WEB_TMDB_API_KEY=KEY_HERE`.
|
||||
Then make sure you've run `npm i` to get all the dependencies.
|
||||
|
||||
### Mode 1 - interactive
|
||||
|
||||
To run the CLI without needing to learn all the arguments, simply run the following command and go with the flow.
|
||||
|
||||
```sh
|
||||
npm run cli
|
||||
```
|
||||
|
||||
### Mode 2 - arguments
|
||||
|
||||
For repeatability, it can be useful to specify the arguments one by one.
|
||||
To see all the arguments, you can run the help command:
|
||||
```sh
|
||||
npm run cli -- -h
|
||||
```
|
||||
|
||||
Then just run it with your arguments, for example:
|
||||
```sh
|
||||
npm run cli -- -sid showbox -tid 556574
|
||||
```
|
||||
|
||||
### Examples
|
||||
|
||||
```sh
|
||||
# Spirited away - showbox
|
||||
npm run cli -- -sid showbox -tid 129
|
||||
|
||||
# Hamilton - flixhq
|
||||
npm run cli -- -sid flixhq -tid 556574
|
||||
|
||||
# Arcane S1E1 - showbox
|
||||
npm run cli -- -sid zoechip -tid 94605 -s 1 -e 1
|
||||
|
||||
# febbox mp4 - get streams from an embed (gotten from a source output)
|
||||
npm run cli -- -sid febbox-mp4 -u URL_HERE
|
||||
```
|
||||
|
||||
### Fetcher options
|
||||
|
||||
The CLI comes with a few built-in fetchers:
|
||||
- `node-fetch`: Fetch using the "node-fetch" library.
|
||||
- `native`: Use the new fetch built into Node.JS (undici).
|
||||
- `browser`: Start up headless chrome, and run the library in that context using a proxied fetcher.
|
||||
|
||||
::alert{type="warning"}
|
||||
The browser fetcher will require you to run `npm run build` before running the CLI. Otherwise you will get outdated results.
|
||||
::
|
||||
271
.docs/content/4.extra-topics/0.sources-and-embeds.md
Normal file
271
.docs/content/4.extra-topics/0.sources-and-embeds.md
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
# Sources vs Embeds
|
||||
|
||||
Understanding the difference between sources and embeds is crucial for building scrapers effectively. They work together in a two-stage pipeline to extract playable video streams.
|
||||
|
||||
## The Two-Stage Pipeline
|
||||
|
||||
```
|
||||
User Request → Source Scraper → What did source find?
|
||||
↓
|
||||
┌─────────────┐
|
||||
↓ ↓
|
||||
Direct Stream Embed URLs
|
||||
↓ ↓
|
||||
Play Video Embed Scraper
|
||||
↓
|
||||
Extract Stream
|
||||
↓
|
||||
Play Video
|
||||
```
|
||||
|
||||
**Flow Breakdown:**
|
||||
1. **User requests** content (movie/TV show)
|
||||
2. **Source scraper** searches the target website
|
||||
3. **Source returns** either:
|
||||
- **Direct streams** → Ready to play immediately
|
||||
- **Embed URLs** → Need further processing
|
||||
4. **Embed scraper** (if needed) extracts streams from player URLs
|
||||
5. **Final result** → Playable video stream
|
||||
|
||||
## Sources: The Content Finders
|
||||
|
||||
**Sources** are the first stage - they find content on websites and return either:
|
||||
1. **Direct video streams** (ready to play)
|
||||
2. **Embed URLs** that need further processing
|
||||
|
||||
### Example: Autoembed Source
|
||||
|
||||
```typescript
|
||||
// From src/providers/sources/autoembed.ts
|
||||
async function comboScraper(ctx: ShowScrapeContext | MovieScrapeContext): Promise<SourcererOutput> {
|
||||
// 1. Call an API to find video sources
|
||||
const data = await ctx.proxiedFetcher(`/api/getVideoSource`, {
|
||||
baseUrl: 'https://tom.autoembed.cc',
|
||||
query: { type: mediaType, id }
|
||||
});
|
||||
|
||||
// 2. Return embed URLs for further processing
|
||||
return {
|
||||
embeds: [{
|
||||
embedId: 'autoembed-english', // Points to an embed scraper
|
||||
url: data.videoSource // URL that embed will process
|
||||
}]
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**What this source does:**
|
||||
- Queries an API with TMDB ID
|
||||
- Gets back a video source URL
|
||||
- Returns it as an embed for the `autoembed-english` embed scraper to handle
|
||||
|
||||
### Example: Catflix Source
|
||||
|
||||
```typescript
|
||||
// From src/providers/sources/catflix.ts
|
||||
async function comboScraper(ctx: ShowScrapeContext | MovieScrapeContext): Promise<SourcererOutput> {
|
||||
// 1. Build URL to the movie/show page
|
||||
const watchPageUrl = `${baseUrl}/movie/${mediaTitle}-${movieId}`;
|
||||
|
||||
// 2. Scrape the page for embedded player URLs
|
||||
const watchPage = await ctx.proxiedFetcher(watchPageUrl);
|
||||
const $ = load(watchPage);
|
||||
|
||||
// 3. Extract and decode the embed URL
|
||||
const mainOriginMatch = scriptData.data.match(/main_origin = "(.*?)";/);
|
||||
const decodedUrl = atob(mainOriginMatch[1]);
|
||||
|
||||
// 4. Return embed URL for turbovid embed to process
|
||||
return {
|
||||
embeds: [{
|
||||
embedId: 'turbovid', // Points to turbovid embed scraper
|
||||
url: decodedUrl // Turbovid player URL
|
||||
}]
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**What this source does:**
|
||||
- Scrapes a streaming website
|
||||
- Finds encoded embed player URLs in the page source
|
||||
- Decodes the URL and returns it for the `turbovid` embed scraper
|
||||
|
||||
## Embeds: The Stream Extractors
|
||||
|
||||
**Embeds** are the second stage - they take URLs from sources and extract the actual playable video streams. Each embed type knows how to handle a specific player or service.
|
||||
|
||||
### Example: Autoembed Embed (Simple)
|
||||
|
||||
```typescript
|
||||
// From src/providers/embeds/autoembed.ts
|
||||
async scrape(ctx) {
|
||||
// The URL from the source is already a direct HLS playlist
|
||||
return {
|
||||
stream: [{
|
||||
id: 'primary',
|
||||
type: 'hls',
|
||||
playlist: ctx.url, // Use the URL directly as HLS playlist
|
||||
flags: [flags.CORS_ALLOWED],
|
||||
captions: []
|
||||
}]
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**What this embed does:**
|
||||
- Takes the URL from autoembed source
|
||||
- Treats it as a direct HLS playlist (no further processing needed)
|
||||
- Returns it as a playable stream
|
||||
|
||||
### Example: Turbovid Embed (Complex)
|
||||
|
||||
```typescript
|
||||
// From src/providers/embeds/turbovid.ts
|
||||
async scrape(ctx) {
|
||||
// 1. Fetch the turbovid player page
|
||||
const embedPage = await ctx.proxiedFetcher(ctx.url);
|
||||
|
||||
// 2. Extract encryption keys from the page
|
||||
const apkey = embedPage.match(/const\s+apkey\s*=\s*"(.*?)";/)?.[1];
|
||||
const xxid = embedPage.match(/const\s+xxid\s*=\s*"(.*?)";/)?.[1];
|
||||
|
||||
// 3. Get decryption key from API
|
||||
const encodedJuiceKey = JSON.parse(
|
||||
await ctx.proxiedFetcher('/api/cucked/juice_key', { baseUrl })
|
||||
).juice;
|
||||
|
||||
// 4. Get encrypted playlist data
|
||||
const data = JSON.parse(
|
||||
await ctx.proxiedFetcher('/api/cucked/the_juice_v2/', {
|
||||
baseUrl, query: { [apkey]: xxid }
|
||||
})
|
||||
).data;
|
||||
|
||||
// 5. Decrypt the playlist URL
|
||||
const playlist = decrypt(data, atob(encodedJuiceKey));
|
||||
|
||||
// 6. Return proxied stream (handles CORS/headers)
|
||||
return {
|
||||
stream: [{
|
||||
type: 'hls',
|
||||
id: 'primary',
|
||||
playlist: createM3U8ProxyUrl(playlist, streamHeaders),
|
||||
flags: [], captions: []
|
||||
}]
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**What this embed does:**
|
||||
- Takes turbovid player URL from catflix source
|
||||
- Performs complex extraction: fetches page → gets keys → decrypts data
|
||||
- Returns the final HLS playlist with proper proxy handling
|
||||
|
||||
## Key Differences
|
||||
|
||||
| Sources | Embeds |
|
||||
|---------|--------|
|
||||
| **Find content** on websites | **Extract streams** from players |
|
||||
| Return embed URLs OR direct streams | Always return direct streams |
|
||||
| Handle website navigation/search | Handle player-specific extraction |
|
||||
| Can return multiple server options | Process one specific player type |
|
||||
| Example: "Find Avengers on Catflix" | Example: "Extract stream from Turbovid player" |
|
||||
|
||||
## Why This Separation?
|
||||
|
||||
### 1. **Reusability**
|
||||
Multiple sources can use the same embed:
|
||||
```typescript
|
||||
// Both catflix and other sources can return turbovid embeds
|
||||
{ embedId: 'turbovid', url: 'https://turbovid.com/player123' }
|
||||
```
|
||||
|
||||
### 2. **Multiple Server Options**
|
||||
Sources can provide backup servers:
|
||||
```typescript
|
||||
return {
|
||||
embeds: [
|
||||
{ embedId: 'turbovid', url: 'https://turbovid.com/player123' },
|
||||
{ embedId: 'vidcloud', url: 'https://vidcloud.co/embed456' },
|
||||
{ embedId: 'dood', url: 'https://dood.watch/789' }
|
||||
]
|
||||
};
|
||||
```
|
||||
|
||||
### 3. **Language/Quality Variants**
|
||||
Sources can offer different options:
|
||||
```typescript
|
||||
return {
|
||||
embeds: [
|
||||
{ embedId: 'autoembed-english', url: streamUrl },
|
||||
{ embedId: 'autoembed-spanish', url: streamUrlEs },
|
||||
{ embedId: 'autoembed-hindi', url: streamUrlHi }
|
||||
]
|
||||
};
|
||||
```
|
||||
|
||||
### 4. **Specialization**
|
||||
- **Sources** specialize in website structures and search
|
||||
- **Embeds** specialize in player technologies and decryption
|
||||
|
||||
## How They Work Together
|
||||
|
||||
### Flow Example: Finding "Spirited Away"
|
||||
|
||||
1. **Source (catflix)**:
|
||||
- Searches catflix.su for "Spirited Away"
|
||||
- Finds movie page with embedded player
|
||||
- Extracts turbovid URL: `https://turbovid.com/embed/abc123`
|
||||
- Returns: `{ embedId: 'turbovid', url: 'https://turbovid.com/embed/abc123' }`
|
||||
|
||||
2. **Embed (turbovid)**:
|
||||
- Receives the turbovid URL
|
||||
- Scrapes the player page for encryption keys
|
||||
- Decrypts the actual HLS playlist URL
|
||||
- Returns: `{ stream: [{ playlist: 'https://cdn.example.com/movie.m3u8' }] }`
|
||||
|
||||
3. **Result**: User can now play the video stream
|
||||
|
||||
### Error Handling Chain
|
||||
|
||||
If the embed fails to extract a stream:
|
||||
```typescript
|
||||
// Source provides multiple backup options
|
||||
return {
|
||||
embeds: [
|
||||
{ embedId: 'turbovid', url: url1 }, // Try first
|
||||
{ embedId: 'mixdrop', url: url2 }, // Fallback 1
|
||||
{ embedId: 'dood', url: url3 } // Fallback 2
|
||||
]
|
||||
};
|
||||
```
|
||||
|
||||
The system tries each embed in rank order until one succeeds.
|
||||
|
||||
## Best Practices
|
||||
|
||||
### For Sources:
|
||||
- Provide multiple embed options when possible
|
||||
- Use descriptive embed IDs that match existing embeds
|
||||
- Handle both movies and TV shows (combo scraper pattern)
|
||||
- Return direct streams when embed processing isn't needed
|
||||
|
||||
### For Embeds:
|
||||
- Focus on one player type per embed
|
||||
- Handle errors gracefully with clear error messages
|
||||
- Use proxy functions for protected streams
|
||||
- Include proper headers and flags
|
||||
|
||||
### Registration:
|
||||
```typescript
|
||||
// In src/providers/all.ts
|
||||
export function gatherAllSources(): Array<Sourcerer> {
|
||||
return [catflixScraper, autoembedScraper, /* ... */];
|
||||
}
|
||||
|
||||
export function gatherAllEmbeds(): Array<Embed> {
|
||||
return [turbovidScraper, autoembedEnglishScraper, /* ... */];
|
||||
}
|
||||
```
|
||||
|
||||
Both sources and embeds must be registered in `all.ts` to be available for use.
|
||||
|
|
@ -6,7 +6,7 @@ This is the main entry-point of the library. It is recommended to make one insta
|
|||
## Example
|
||||
|
||||
```ts
|
||||
import { targets, makeProviders, makeDefaultFetcher } from '@movie-web/providers';
|
||||
import { targets, makeProviders, makeDefaultFetcher } from '@p-stream/providers';
|
||||
|
||||
const providers = makeProviders({
|
||||
fetcher: makeDefaultFetcher(fetch),
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Run a specific source scraper and get its emitted streams.
|
|||
## Example
|
||||
|
||||
```ts
|
||||
import { SourcererOutput, NotFoundError } from '@movie-web/providers';
|
||||
import { SourcererOutput, NotFoundError } from '@p-stream/providers';
|
||||
|
||||
// media from TMDB
|
||||
const media = {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Run a specific embed scraper and get its emitted streams.
|
|||
## Example
|
||||
|
||||
```ts
|
||||
import { SourcererOutput } from '@movie-web/providers';
|
||||
import { SourcererOutput } from '@p-stream/providers';
|
||||
|
||||
// scrape a stream from upcloud
|
||||
let output: EmbedOutput;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Make a fetcher from a `fetch()` API. It is used for making an instance of provid
|
|||
## Example
|
||||
|
||||
```ts
|
||||
import { targets, makeProviders, makeDefaultFetcher } from '@movie-web/providers';
|
||||
import { targets, makeProviders, makeDefaultFetcher } from '@p-stream/providers';
|
||||
|
||||
const providers = makeProviders({
|
||||
fetcher: makeStandardFetcher(fetch),
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# `makeSimpleProxyFetcher`
|
||||
|
||||
Make a fetcher to use with [movie-web/simple-proxy](https://github.com/movie-web/simple-proxy). This is for making a proxiedFetcher, so you can run this library in the browser.
|
||||
Make a fetcher to use with [p-stream/simple-proxy](https://github.com/p-stream/simple-proxy). This is for making a proxiedFetcher, so you can run this library in the browser.
|
||||
|
||||
## Example
|
||||
|
||||
```ts
|
||||
import { targets, makeProviders, makeDefaultFetcher, makeSimpleProxyFetcher } from '@movie-web/providers';
|
||||
import { targets, makeProviders, makeDefaultFetcher, makeSimpleProxyFetcher } from '@p-stream/providers';
|
||||
|
||||
const proxyUrl = 'https://your.proxy.workers.dev/'
|
||||
|
||||
|
|
|
|||
11432
.docs/pnpm-lock.yaml
11432
.docs/pnpm-lock.yaml
File diff suppressed because it is too large
Load diff
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -1 +1 @@
|
|||
* @movie-web/project-leads
|
||||
* @p-stream/project-leads
|
||||
|
|
|
|||
2
.github/CODE_OF_CONDUCT.md
vendored
2
.github/CODE_OF_CONDUCT.md
vendored
|
|
@ -1 +1 @@
|
|||
Please visit the [main document at primary repository](https://github.com/movie-web/movie-web/blob/dev/.github/CODE_OF_CONDUCT.md).
|
||||
Please visit the [main document at primary repository](https://github.com/p-stream/p-stream/blob/dev/.github/CODE_OF_CONDUCT.md).
|
||||
|
|
|
|||
2
.github/CONTRIBUTING.md
vendored
2
.github/CONTRIBUTING.md
vendored
|
|
@ -1 +1 @@
|
|||
Please visit the [main document at primary repository](https://github.com/movie-web/movie-web/blob/dev/.github/CONTRIBUTING.md).
|
||||
Please visit the [main document at primary repository](https://github.com/p-stream/p-stream/blob/dev/.github/CONTRIBUTING.md).
|
||||
|
|
|
|||
10
.github/SECURITY.md
vendored
10
.github/SECURITY.md
vendored
|
|
@ -2,13 +2,13 @@
|
|||
|
||||
## Supported Versions
|
||||
|
||||
The movie-web maintainers only support the latest version of movie-web published at https://movie-web.app.
|
||||
The p-stream maintainers only support the latest version of p-stream published at https://p-stream.app.
|
||||
This published version is equivalent to the master branch.
|
||||
|
||||
Support is not provided for any forks or mirrors of movie-web.
|
||||
Support is not provided for any forks or mirrors of p-stream.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
There are two ways you can contact the movie-web maintainers to report a vulnerability:
|
||||
- Email [security@movie-web.app](mailto:security@movie-web.app)
|
||||
- Report the vulnerability in the [movie-web Discord server](https://movie-web.github.io/links/discord)
|
||||
There are two ways you can contact the p-stream maintainers to report a vulnerability:
|
||||
- Email [security@p-stream.app](mailto:security@p-stream.app)
|
||||
- Report the vulnerability in the [p-stream Discord server](https://p-stream.github.io/links/discord)
|
||||
|
|
|
|||
6
.github/pull_request_template.md
vendored
6
.github/pull_request_template.md
vendored
|
|
@ -1,6 +1,6 @@
|
|||
This pull request resolves #XXX
|
||||
|
||||
- [ ] I have read and agreed to the [code of conduct](https://github.com/movie-web/movie-web/blob/dev/.github/CODE_OF_CONDUCT.md).
|
||||
- [ ] I have read and complied with the [contributing guidelines](https://github.com/movie-web/movie-web/blob/dev/.github/CONTRIBUTING.md).
|
||||
- [ ] What I'm implementing was assigned to me and is an [approved issue](https://github.com/movie-web/movie-web/issues?q=is%3Aopen+is%3Aissue+label%3Aapproved). For reference, please take a look at our [GitHub projects](https://github.com/movie-web/movie-web/projects).
|
||||
- [ ] I have read and agreed to the [code of conduct](https://github.com/p-stream/p-stream/blob/dev/.github/CODE_OF_CONDUCT.md).
|
||||
- [ ] I have read and complied with the [contributing guidelines](https://github.com/p-stream/p-stream/blob/dev/.github/CONTRIBUTING.md).
|
||||
- [ ] What I'm implementing was assigned to me and is an [approved issue](https://github.com/p-stream/p-stream/issues?q=is%3Aopen+is%3Aissue+label%3Aapproved). For reference, please take a look at our [GitHub projects](https://github.com/p-stream/p-stream/projects).
|
||||
- [ ] I have tested all of my changes.
|
||||
|
|
|
|||
2
LICENSE
2
LICENSE
|
|
@ -1,6 +1,6 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 movie-web
|
||||
Copyright (c) 2023 p-stream
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# @movie-web/providers
|
||||
# @p-stream/providers
|
||||
|
||||
READ: Most sources marked with 🔥 or 🤝 are only avaliable on https://pstream.org
|
||||
|
||||
|
|
|
|||
14
package.json
14
package.json
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "@movie-web/providers",
|
||||
"name": "@p-stream/providers",
|
||||
"version": "2.3.0",
|
||||
"description": "Package that contains all the providers of movie-web",
|
||||
"description": "Package that contains all the providers of p-stream",
|
||||
"type": "module",
|
||||
"main": "./lib/index.js",
|
||||
"types": "./lib/index.d.ts",
|
||||
|
|
@ -22,18 +22,18 @@
|
|||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/movie-web/providers.git"
|
||||
"url": "git+https://github.com/p-stream/providers.git"
|
||||
},
|
||||
"keywords": [
|
||||
"movie-web",
|
||||
"p-stream",
|
||||
"providers"
|
||||
],
|
||||
"author": "movie-web",
|
||||
"author": "p-stream",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://github.com/movie-web/providers/issues"
|
||||
"url": "https://github.com/p-stream/providers/issues"
|
||||
},
|
||||
"homepage": "https://movie-web.github.io/providers/",
|
||||
"homepage": "https://p-stream.github.io/providers/",
|
||||
"scripts": {
|
||||
"build": "vite build && tsc --noEmit",
|
||||
"cli": "vite-node ./src/dev-cli/index.ts",
|
||||
|
|
|
|||
Loading…
Reference in a new issue