跳转到内容

node爬虫实践-axios

请求QPS限制

场景:服务器对请求频率有限制

要求:两次请求的间隔不能小于 minInterval,不包括 http 请求时间&响应时间

ts
import axios from 'axios';
import { createRequestThrottle } from './create-request-throttle.ts';

const http = axios.create({
    // ...other config
});
http.interceptors.request.use(createRequestThrottle(1500));
ts
import type { InternalAxiosRequestConfig } from 'axios';
import { setTimeout } from 'node:timers/promises';

/**
 * 创建一个简单的顺序节流阀,强制执行任务之间的最小间隔
 * @param minInterval - 最小间隔时间(毫秒)
 * @returns - 节流阀函数,用于 http.interceptors.request.use
 */
export function createRequestThrottle(minInterval: number) {
    let chain: Promise<void> = Promise.resolve();
    let lastExecutedAt = 0;

    async function executeTimeout() {
        const now = Date.now();
        if (lastExecutedAt > 0) {
            const elapsed = now - lastExecutedAt;
            if (elapsed < minInterval) {
                const remaining = minInterval - elapsed;
                await setTimeout(remaining);
            }
        }

        lastExecutedAt = Date.now();
    }

    return function (config: InternalAxiosRequestConfig) {
        const runner = () => executeTimeout().then(() => {
            return config;
        });
        const runPromise = chain.then(runner, runner);
        chain = runPromise.then(
            () => undefined,
            () => undefined,
        );
        return runPromise;
    };
}

http 代理

场景描述

服务器对网络IP有限制

ts
import axios from 'axios';
import { HttpProxyAgent } from 'http-proxy-agent';
import { HttpsProxyAgent } from 'https-proxy-agent';

export const http = axios.create({
    proxy: false,
    httpsAgent: new HttpsProxyAgent('http://lcadmin:lcadmin@127.0.0.1:7890'),
    httpAgent: new HttpProxyAgent('http://lcadmin:lcadmin@127.0.0.1:7890'),
    // ...other config
});

最后更新于: