import requests
# 配置代理IP
proxies = {
"http": "http://代理IP:端口",
"https": "https://代理IP:端口",
}
# 发起带代理的网络请求
try:
res = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=5)
print("当前请求IP:", res.text)
except Exception as e:
print("请求失败:",e)import requests
import threading
import random
# 代理IP池(海量IP池可对接代理接口自动获取)
ip_pool = [
"http://ip1:port",
"http://ip2:port",
"http://ip3:port",
"http://ip4:port"
]
# 爬虫请求函数
def spider_task(url):
# 随机选取代理IP,实现自动轮换
proxy = random.choice(ip_pool)
proxies = {"http":proxy,"https":proxy}
try:
response = requests.get(url,proxies=proxies,timeout=8)
print(f"线程{threading.current_thread().name} 请求成功,状态码:{response.status_code}")
except:
# 请求失败自动更换IP重试
new_proxy = random.choice(ip_pool)
requests.get(url,proxies={"http":new_proxy,"https":new_proxy},timeout=8)
if __name__ == "__main__":
target_url = "https://httpbin.org/ip"
# 开启10个并发线程
for i in range(10):
t = threading.Thread(target=spider_task,args=(target_url,))
t.start()拒绝自建小规模IP池:本地手动维护IP池数量有限,极易出现IP复用、IP失效问题,建议直接对接商用代理接口,实时调取新鲜IP;
必须开启超时机制:劣质代理延迟极高,不加timeout超时参数会直接导致爬虫线程卡死;
就近匹配线路降低延迟:爬虫服务器在国内,优先选择国内就近节点,跨地区线路会大幅增加请求延迟,降低爬虫运行效率。

