python代码中如何使用HTTP代理

华益云HTTP爬虫IP 2022-09-13 1413

电子说

1.3w人已加入

描述

一、什么是HTTP代理。

HTTP代理就是介于浏览器和web服务器之间的一台服务器，连接代理后，浏览器不再直接向web服务器取回网页，而是向代理服务器发出request信号，代理服务器再想web服务器发出请求，收到web服务器返回的数据后再反馈给浏览器。

二、华益云HTTP代理IP基本参数介绍

1：基本参数介绍：支持http，https，socks5等多种协议类型。100%高匿IP，支持API提取，API最快提取间隔1秒，一次最多可以提取200个。

2：支持的语言：python，java，c，c#，go，php，易语言等多种编程语言。

3：使用范围：软件程序API调用，爬虫，网页访问，浏览等支持HTTP协议批量调用的应用。

python代码中如何使用HTTP代理。

# 此版本无需安装依赖
import urllib
import urllib.request
import urllib

def main():
    # 发送给服务器的标识
    userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/532.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
    # 代理api（这里我推荐使用www.9vps.com华益云的HTTP代理API，注册就白嫖1万IP）
    proxyUrl = "http://http.9vps.com/getip.asp?username=166xxxx6597&pwd=xxxxbaa59ce237dff65134984b9cxxxx&geshi=1&fenge=1&fengefu=&Contenttype=1&getnum=20&setcity=&operate=all&";
    # 请求代理url，获取代理ip 
    outPutProxy = getProxy(proxyUrl, userAgent)
    if len(outPutProxy)==0:
        # 没有获取到代理
        return
    # 目标请求网站
    # https://httpbin.org/get
    url = "https://www.qq.com/"
    content = None
 
    for _ in range(0, 3):
        # 最多尝试三次
        try:
            # 从列表中取出一个代理出来
            proxy = outPutProxy.pop(0)
            px = {
                "http": proxy,
                "https": proxy
            }
            content = requestGet(url, userAgent, px)
            break
        except Exception as e:
            print(e)
            if (len(outPutProxy) == 0):
                # 如果发现没有代理了，就去获取下。
                outPutProxy = getProxy(proxyUrl, userAgent)
     
    print(content)
def getProxy(proxyUrl, userAgent):
    proxyIps=""
    outPutProxy = []
    try:
        proxyIps = requestGet(proxyUrl, userAgent, None)
        print("(proxyIps)", proxyIps)
        # {"code":3002,"data":[],"msg":"error!用户名或密码错误","success":false}
        if "{" in proxyIps:
            raise Exception("[错误]"+proxyIps)
        outPutProxy = proxyIps.splitlines()
        
    except Exception as e:
        print(e)
    print("总共获取了"+str(len(outPutProxy))+"个代理")
    return outPutProxy

def requestGet(url, userAgent, proxy):
    headers = {
        "User-Agent": userAgent
    }
    # httpproxy_handler = urllib.ProxyHandler({"http" : " 180.104.192.217:22036"}) 
    response = None
    if (proxy):
        proxyHandler = urllib.request.ProxyHandler(proxy)
        opener = urllib.request.build_opener(proxyHandler, urllib.request.HTTPHandler)
        urllib.request.install_opener(opener)
        request = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(request, timeout=5)
    else:
        # 没有代理走这个
        request = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(request, timeout=5)
        #response = opener.open(request)
    
    
    html = response.read()
    # # 设置编码，防止乱码
    # 手动设置网页字符编码方式
    return html.decode("utf-8", "ignore")

main()

打开APP阅读更多精彩内容