add scraping timeout

This commit is contained in:
Pas 2025-04-06 23:39:49 -06:00
parent 4a371321cb
commit cf7c449d54
7 changed files with 104 additions and 31 deletions

View file

@ -83,6 +83,7 @@
"vitest": "^1.6.0"
},
"dependencies": {
"abort-controller": "^3.0.0",
"cheerio": "1.0.0-rc.12",
"cookie": "^0.6.0",
"crypto-js": "^4.2.0",

View file

@ -8,6 +8,9 @@ importers:
.:
dependencies:
abort-controller:
specifier: ^3.0.0
version: 3.0.0
cheerio:
specifier: 1.0.0-rc.12
version: 1.0.0-rc.12
@ -686,6 +689,10 @@ packages:
'@vue/shared@3.5.13':
resolution: {integrity: sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ==}
abort-controller@3.0.0:
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
engines: {node: '>=6.5'}
acorn-jsx@5.3.2:
resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
peerDependencies:
@ -1258,6 +1265,10 @@ packages:
resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
engines: {node: '>=0.10.0'}
event-target-shim@5.0.1:
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
engines: {node: '>=6'}
execa@8.0.1:
resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==}
engines: {node: '>=16.17'}
@ -3111,6 +3122,10 @@ snapshots:
'@vue/shared@3.5.13': {}
abort-controller@3.0.0:
dependencies:
event-target-shim: 5.0.1
acorn-jsx@5.3.2(acorn@8.14.0):
dependencies:
acorn: 8.14.0
@ -3834,6 +3849,8 @@ snapshots:
esutils@2.0.3: {}
event-target-shim@5.0.1: {}
execa@8.0.1:
dependencies:
cross-spawn: 7.0.6

View file

@ -1,6 +1,7 @@
import { makeSimpleProxyFetcher } from '@/fetchers/simpleProxy';
import { DefaultedFetcherOptions, FetcherOptions } from '@/fetchers/types';
import { Headers } from 'node-fetch';
import { AbortController } from 'abort-controller';
import { afterEach, describe, expect, it, vi } from 'vitest';
describe('makeSimpleProxyFetcher()', () => {
@ -50,6 +51,8 @@ describe('makeSimpleProxyFetcher()', () => {
);
expect((async () => (await prom).statusCode)()).resolves.toEqual(204);
expect((async () => (await prom).finalUrl)()).resolves.toEqual('test123');
const controller = new AbortController();
ops.output.signal = controller.signal;
expect(fetch).toBeCalledWith(ops.outputUrl ?? ops.inputUrl, ops.output);
vi.clearAllMocks();
}
@ -72,6 +75,7 @@ describe('makeSimpleProxyFetcher()', () => {
headers: {
'X-Hello': 'world',
},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -90,6 +94,7 @@ describe('makeSimpleProxyFetcher()', () => {
output: {
method: 'GET',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -106,6 +111,7 @@ describe('makeSimpleProxyFetcher()', () => {
output: {
method: 'GET',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -125,6 +131,7 @@ describe('makeSimpleProxyFetcher()', () => {
output: {
method: 'POST',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -141,6 +148,7 @@ describe('makeSimpleProxyFetcher()', () => {
output: {
method: 'POST',
headers: {},
signal: new AbortController().signal,
},
outputBody: { hello: 42 },
});

View file

@ -1,6 +1,7 @@
import { makeStandardFetcher } from '@/fetchers/standardFetch';
import { DefaultedFetcherOptions } from '@/fetchers/types';
import { Headers } from 'node-fetch';
import { AbortController } from 'abort-controller';
import { afterEach, describe, expect, it, vi } from 'vitest';
describe('makeStandardFetcher()', () => {
@ -50,6 +51,8 @@ describe('makeStandardFetcher()', () => {
);
expect((async () => (await prom).statusCode)()).resolves.toEqual(204);
expect((async () => (await prom).finalUrl)()).resolves.toEqual('test123');
const controller = new AbortController();
ops.output.signal = controller.signal;
expect(fetch).toBeCalledWith(ops.outputUrl ?? ops.inputUrl, ops.output);
vi.clearAllMocks();
}
@ -72,7 +75,7 @@ describe('makeStandardFetcher()', () => {
headers: {
'X-Hello': 'world',
},
body: undefined,
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -91,6 +94,7 @@ describe('makeStandardFetcher()', () => {
output: {
method: 'GET',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -107,6 +111,7 @@ describe('makeStandardFetcher()', () => {
output: {
method: 'GET',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -126,6 +131,7 @@ describe('makeStandardFetcher()', () => {
output: {
method: 'POST',
headers: {},
signal: new AbortController().signal,
},
outputBody: 'hello world',
});
@ -142,6 +148,7 @@ describe('makeStandardFetcher()', () => {
output: {
method: 'POST',
headers: {},
signal: new AbortController().signal,
},
outputBody: { hello: 42 },
});

View file

@ -8,6 +8,7 @@ export type FetchOps = {
method: string;
body: any;
credentials?: 'include' | 'same-origin' | 'omit';
signal?: any;
};
export type FetchHeaders = {

View file

@ -1,3 +1,5 @@
import AbortController from 'abort-controller';
import { makeFullUrl } from '@/fetchers/common';
import { FetchLike } from '@/fetchers/fetch';
import { makeStandardFetcher } from '@/fetchers/standardFetch';
@ -18,19 +20,39 @@ const responseHeaderMap: Record<string, string> = {
export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher {
const proxiedFetch: Fetcher = async (url, ops) => {
const fetcher = makeStandardFetcher(async (a, b) => {
const res = await f(a, b);
// AbortController
const controller = new AbortController();
const timeout = 10000; // 10s timeout
const timeoutId = setTimeout(() => controller.abort(), timeout);
// set extra headers that cant normally be accessed
res.extraHeaders = new Headers();
Object.entries(responseHeaderMap).forEach((entry) => {
const value = res.headers.get(entry[0]);
if (!value) return;
res.extraHeaders?.set(entry[1].toLowerCase(), value);
});
try {
const res = await f(a, {
method: b?.method || 'GET',
headers: b?.headers || {},
body: b?.body,
credentials: b?.credentials,
signal: controller.signal, // Pass the signal to fetch
});
// set correct final url
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
return res;
clearTimeout(timeoutId);
// set extra headers that cant normally be accessed
res.extraHeaders = new Headers();
Object.entries(responseHeaderMap).forEach((entry) => {
const value = res.headers.get(entry[0]);
if (!value) return;
res.extraHeaders?.set(entry[1].toLowerCase(), value);
});
// set correct final url
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
return res;
} catch (error: any) {
if (error.name === 'AbortError') {
throw new Error(`Fetch request to ${a} timed out after ${timeout}ms`);
}
throw error;
}
});
const fullUrl = makeFullUrl(url, ops);

View file

@ -1,3 +1,5 @@
import AbortController from 'abort-controller';
import { serializeBody } from '@/fetchers/body';
import { makeFullUrl } from '@/fetchers/common';
import { FetchLike, FetchReply } from '@/fetchers/fetch';
@ -21,27 +23,42 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
const fullUrl = makeFullUrl(url, ops);
const seralizedBody = serializeBody(ops.body);
const res = await f(fullUrl, {
method: ops.method,
headers: {
...seralizedBody.headers,
...ops.headers,
},
body: seralizedBody.body,
credentials: ops.credentials,
});
// AbortController
const controller = new AbortController();
const timeout = 10000; // 10s timeout
const timeoutId = setTimeout(() => controller.abort(), timeout);
let body: any;
const isJson = res.headers.get('content-type')?.includes('application/json');
if (isJson) body = await res.json();
else body = await res.text();
try {
const res = await f(fullUrl, {
method: ops.method,
headers: {
...seralizedBody.headers,
...ops.headers,
},
body: seralizedBody.body,
credentials: ops.credentials,
signal: controller.signal, // Pass the signal to fetch
});
return {
body,
finalUrl: res.extraUrl ?? res.url,
headers: getHeaders(ops.readHeaders, res),
statusCode: res.status,
};
clearTimeout(timeoutId);
let body: any;
const isJson = res.headers.get('content-type')?.includes('application/json');
if (isJson) body = await res.json();
else body = await res.text();
return {
body,
finalUrl: res.extraUrl ?? res.url,
headers: getHeaders(ops.readHeaders, res),
statusCode: res.status,
};
} catch (error: any) {
if (error.name === 'AbortError') {
throw new Error(`Fetch request to ${fullUrl} timed out after ${timeout}ms`);
}
throw error;
}
};
return normalFetch;