node爬虫实践-axios
请求QPS限制
场景:服务器对请求频率有限制
要求:两次请求的间隔不能小于 minInterval,不包括 http 请求时间&响应时间
ts
import axios from 'axios';
import { createRequestThrottle } from './create-request-throttle.ts';
const http = axios.create({
// ...other config
});
http.interceptors.request.use(createRequestThrottle(1500));ts
import type { InternalAxiosRequestConfig } from 'axios';
import { setTimeout } from 'node:timers/promises';
/**
* 创建一个简单的顺序节流阀,强制执行任务之间的最小间隔
* @param minInterval - 最小间隔时间(毫秒)
* @returns - 节流阀函数,用于 http.interceptors.request.use
*/
export function createRequestThrottle(minInterval: number) {
let chain: Promise<void> = Promise.resolve();
let lastExecutedAt = 0;
async function executeTimeout() {
const now = Date.now();
if (lastExecutedAt > 0) {
const elapsed = now - lastExecutedAt;
if (elapsed < minInterval) {
const remaining = minInterval - elapsed;
await setTimeout(remaining);
}
}
lastExecutedAt = Date.now();
}
return function (config: InternalAxiosRequestConfig) {
const runner = () => executeTimeout().then(() => {
return config;
});
const runPromise = chain.then(runner, runner);
chain = runPromise.then(
() => undefined,
() => undefined,
);
return runPromise;
};
}http 代理
场景描述
服务器对网络IP有限制
ts
import axios from 'axios';
import { HttpProxyAgent } from 'http-proxy-agent';
import { HttpsProxyAgent } from 'https-proxy-agent';
export const http = axios.create({
proxy: false,
httpsAgent: new HttpsProxyAgent('http://lcadmin:lcadmin@127.0.0.1:7890'),
httpAgent: new HttpProxyAgent('http://lcadmin:lcadmin@127.0.0.1:7890'),
// ...other config
});