mirror of
https://github.com/p-stream/providers.git
synced 2026-01-11 20:10:33 +00:00
add scraping timeout
This commit is contained in:
parent
4a371321cb
commit
cf7c449d54
7 changed files with 104 additions and 31 deletions
|
|
@ -83,6 +83,7 @@
|
|||
"vitest": "^1.6.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"abort-controller": "^3.0.0",
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"cookie": "^0.6.0",
|
||||
"crypto-js": "^4.2.0",
|
||||
|
|
|
|||
|
|
@ -8,6 +8,9 @@ importers:
|
|||
|
||||
.:
|
||||
dependencies:
|
||||
abort-controller:
|
||||
specifier: ^3.0.0
|
||||
version: 3.0.0
|
||||
cheerio:
|
||||
specifier: 1.0.0-rc.12
|
||||
version: 1.0.0-rc.12
|
||||
|
|
@ -686,6 +689,10 @@ packages:
|
|||
'@vue/shared@3.5.13':
|
||||
resolution: {integrity: sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ==}
|
||||
|
||||
abort-controller@3.0.0:
|
||||
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
|
||||
engines: {node: '>=6.5'}
|
||||
|
||||
acorn-jsx@5.3.2:
|
||||
resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
|
||||
peerDependencies:
|
||||
|
|
@ -1258,6 +1265,10 @@ packages:
|
|||
resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
event-target-shim@5.0.1:
|
||||
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
execa@8.0.1:
|
||||
resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==}
|
||||
engines: {node: '>=16.17'}
|
||||
|
|
@ -3111,6 +3122,10 @@ snapshots:
|
|||
|
||||
'@vue/shared@3.5.13': {}
|
||||
|
||||
abort-controller@3.0.0:
|
||||
dependencies:
|
||||
event-target-shim: 5.0.1
|
||||
|
||||
acorn-jsx@5.3.2(acorn@8.14.0):
|
||||
dependencies:
|
||||
acorn: 8.14.0
|
||||
|
|
@ -3834,6 +3849,8 @@ snapshots:
|
|||
|
||||
esutils@2.0.3: {}
|
||||
|
||||
event-target-shim@5.0.1: {}
|
||||
|
||||
execa@8.0.1:
|
||||
dependencies:
|
||||
cross-spawn: 7.0.6
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { makeSimpleProxyFetcher } from '@/fetchers/simpleProxy';
|
||||
import { DefaultedFetcherOptions, FetcherOptions } from '@/fetchers/types';
|
||||
import { Headers } from 'node-fetch';
|
||||
import { AbortController } from 'abort-controller';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
describe('makeSimpleProxyFetcher()', () => {
|
||||
|
|
@ -50,6 +51,8 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
);
|
||||
expect((async () => (await prom).statusCode)()).resolves.toEqual(204);
|
||||
expect((async () => (await prom).finalUrl)()).resolves.toEqual('test123');
|
||||
const controller = new AbortController();
|
||||
ops.output.signal = controller.signal;
|
||||
expect(fetch).toBeCalledWith(ops.outputUrl ?? ops.inputUrl, ops.output);
|
||||
vi.clearAllMocks();
|
||||
}
|
||||
|
|
@ -72,6 +75,7 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
headers: {
|
||||
'X-Hello': 'world',
|
||||
},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -90,6 +94,7 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
output: {
|
||||
method: 'GET',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -106,6 +111,7 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
output: {
|
||||
method: 'GET',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -125,6 +131,7 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
output: {
|
||||
method: 'POST',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -141,6 +148,7 @@ describe('makeSimpleProxyFetcher()', () => {
|
|||
output: {
|
||||
method: 'POST',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: { hello: 42 },
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { makeStandardFetcher } from '@/fetchers/standardFetch';
|
||||
import { DefaultedFetcherOptions } from '@/fetchers/types';
|
||||
import { Headers } from 'node-fetch';
|
||||
import { AbortController } from 'abort-controller';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
describe('makeStandardFetcher()', () => {
|
||||
|
|
@ -50,6 +51,8 @@ describe('makeStandardFetcher()', () => {
|
|||
);
|
||||
expect((async () => (await prom).statusCode)()).resolves.toEqual(204);
|
||||
expect((async () => (await prom).finalUrl)()).resolves.toEqual('test123');
|
||||
const controller = new AbortController();
|
||||
ops.output.signal = controller.signal;
|
||||
expect(fetch).toBeCalledWith(ops.outputUrl ?? ops.inputUrl, ops.output);
|
||||
vi.clearAllMocks();
|
||||
}
|
||||
|
|
@ -72,7 +75,7 @@ describe('makeStandardFetcher()', () => {
|
|||
headers: {
|
||||
'X-Hello': 'world',
|
||||
},
|
||||
body: undefined,
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -91,6 +94,7 @@ describe('makeStandardFetcher()', () => {
|
|||
output: {
|
||||
method: 'GET',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -107,6 +111,7 @@ describe('makeStandardFetcher()', () => {
|
|||
output: {
|
||||
method: 'GET',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -126,6 +131,7 @@ describe('makeStandardFetcher()', () => {
|
|||
output: {
|
||||
method: 'POST',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: 'hello world',
|
||||
});
|
||||
|
|
@ -142,6 +148,7 @@ describe('makeStandardFetcher()', () => {
|
|||
output: {
|
||||
method: 'POST',
|
||||
headers: {},
|
||||
signal: new AbortController().signal,
|
||||
},
|
||||
outputBody: { hello: 42 },
|
||||
});
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ export type FetchOps = {
|
|||
method: string;
|
||||
body: any;
|
||||
credentials?: 'include' | 'same-origin' | 'omit';
|
||||
signal?: any;
|
||||
};
|
||||
|
||||
export type FetchHeaders = {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import AbortController from 'abort-controller';
|
||||
|
||||
import { makeFullUrl } from '@/fetchers/common';
|
||||
import { FetchLike } from '@/fetchers/fetch';
|
||||
import { makeStandardFetcher } from '@/fetchers/standardFetch';
|
||||
|
|
@ -18,19 +20,39 @@ const responseHeaderMap: Record<string, string> = {
|
|||
export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher {
|
||||
const proxiedFetch: Fetcher = async (url, ops) => {
|
||||
const fetcher = makeStandardFetcher(async (a, b) => {
|
||||
const res = await f(a, b);
|
||||
// AbortController
|
||||
const controller = new AbortController();
|
||||
const timeout = 10000; // 10s timeout
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
// set extra headers that cant normally be accessed
|
||||
res.extraHeaders = new Headers();
|
||||
Object.entries(responseHeaderMap).forEach((entry) => {
|
||||
const value = res.headers.get(entry[0]);
|
||||
if (!value) return;
|
||||
res.extraHeaders?.set(entry[1].toLowerCase(), value);
|
||||
});
|
||||
try {
|
||||
const res = await f(a, {
|
||||
method: b?.method || 'GET',
|
||||
headers: b?.headers || {},
|
||||
body: b?.body,
|
||||
credentials: b?.credentials,
|
||||
signal: controller.signal, // Pass the signal to fetch
|
||||
});
|
||||
|
||||
// set correct final url
|
||||
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
|
||||
return res;
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
// set extra headers that cant normally be accessed
|
||||
res.extraHeaders = new Headers();
|
||||
Object.entries(responseHeaderMap).forEach((entry) => {
|
||||
const value = res.headers.get(entry[0]);
|
||||
if (!value) return;
|
||||
res.extraHeaders?.set(entry[1].toLowerCase(), value);
|
||||
});
|
||||
|
||||
// set correct final url
|
||||
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
|
||||
return res;
|
||||
} catch (error: any) {
|
||||
if (error.name === 'AbortError') {
|
||||
throw new Error(`Fetch request to ${a} timed out after ${timeout}ms`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
|
||||
const fullUrl = makeFullUrl(url, ops);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import AbortController from 'abort-controller';
|
||||
|
||||
import { serializeBody } from '@/fetchers/body';
|
||||
import { makeFullUrl } from '@/fetchers/common';
|
||||
import { FetchLike, FetchReply } from '@/fetchers/fetch';
|
||||
|
|
@ -21,27 +23,42 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
|
|||
const fullUrl = makeFullUrl(url, ops);
|
||||
const seralizedBody = serializeBody(ops.body);
|
||||
|
||||
const res = await f(fullUrl, {
|
||||
method: ops.method,
|
||||
headers: {
|
||||
...seralizedBody.headers,
|
||||
...ops.headers,
|
||||
},
|
||||
body: seralizedBody.body,
|
||||
credentials: ops.credentials,
|
||||
});
|
||||
// AbortController
|
||||
const controller = new AbortController();
|
||||
const timeout = 10000; // 10s timeout
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
let body: any;
|
||||
const isJson = res.headers.get('content-type')?.includes('application/json');
|
||||
if (isJson) body = await res.json();
|
||||
else body = await res.text();
|
||||
try {
|
||||
const res = await f(fullUrl, {
|
||||
method: ops.method,
|
||||
headers: {
|
||||
...seralizedBody.headers,
|
||||
...ops.headers,
|
||||
},
|
||||
body: seralizedBody.body,
|
||||
credentials: ops.credentials,
|
||||
signal: controller.signal, // Pass the signal to fetch
|
||||
});
|
||||
|
||||
return {
|
||||
body,
|
||||
finalUrl: res.extraUrl ?? res.url,
|
||||
headers: getHeaders(ops.readHeaders, res),
|
||||
statusCode: res.status,
|
||||
};
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
let body: any;
|
||||
const isJson = res.headers.get('content-type')?.includes('application/json');
|
||||
if (isJson) body = await res.json();
|
||||
else body = await res.text();
|
||||
|
||||
return {
|
||||
body,
|
||||
finalUrl: res.extraUrl ?? res.url,
|
||||
headers: getHeaders(ops.readHeaders, res),
|
||||
statusCode: res.status,
|
||||
};
|
||||
} catch (error: any) {
|
||||
if (error.name === 'AbortError') {
|
||||
throw new Error(`Fetch request to ${fullUrl} timed out after ${timeout}ms`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
return normalFetch;
|
||||
|
|
|
|||
Loading…
Reference in a new issue