zoukankan      html  css  js  c++  java
  • 代理IP

    APL代理

    import requests
    import re
    import os
    from lxml import html#此处直接引入etree报错是因为版本问题,换个方式引入
    etree = html.etree#引入etree方法
    from string import punctuation
    # headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'}
    # source=requests.get('https://www.ip138.com/',headers=headers).text
    # # print(source)
    # demo=etree.HTML(source).xpath('//iframe/@src')
    # demo=''.join(demo)
    # # print(demo)
    # # print(source)
    # #获取代理IP页面
    # source=requests.get('http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=8169392b637f4f9ebeb750f51c4c612a&count=1&expiryDate=0&format=1&newLine=2').json()#将复制的东西粘贴在这
    # print(source)
    # #拼接到字典中
    # proxies={
    #     'http':source['msg'][0]['ip']+':'+source['msg'][0]['port']
    # }
    # # print(proxies)
    # print('http:'+demo)
    # headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'}
    # source=requests.get('http:'+demo,headers=headers).text
    # demo=etree.HTML(source).xpath('/html/body/p[1]/a/text()')
    # demo1=etree.HTML(source).xpath('/html/body/p[1]/text()[2]')
    # print(demo)
    # print(demo1)

    隧道代理

    固定部分

    import requests
    import time
    from multiprocessing import Pool
    # 蘑菇代理的隧道订单
    appKey = "OWxHV1kyUnVvdlJodVpqNTpHZDhSZkdJb3VWYVdueHJ6"#粘贴
    # 蘑菇隧道代理服务器地址
    ip_port = 'secondtransfer.moguproxy.com:9001'
    
    proxy = {"http": "http://" + ip_port,"https": "https://" + ip_port}
    headers = {
      "Proxy-Authorization": 'Basic '+ appKey,
      "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
      "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4"}

    以下为获取页面源码

    #     source = requests.get('https://tj.fang.anjuke.com/loupan/all/p' + str(page) + '/', headers=headers, proxies=proxy,verify=False, allow_redirects=False).text  #获取页面源码
    #     demo = etree.HTML(source).xpath('//*[@id="container"]/div[2]/div[1]/div[3]/div/div/a[1]/span/text()')

    以下为多线程

    # #多线程
    # def index(page):
    #     source = requests.get('https://tj.fang.anjuke.com/loupan/all/p' + str(page) + '/', headers=headers, proxies=proxy,verify=False, allow_redirects=False).text  #获取页面源码
    #     demo = etree.HTML(source).xpath('//*[@id="container"]/div[2]/div[1]/div[3]/div/div/a[1]/span/text()')
    #     print(demo)
    #     print('=====================第'+str(page)+'页==================')
    # if __name__ == '__main__':
    #     # print('Parent process %s.' % os.getpid())
    #     p = Pool(2)#开启的线程数
    #     for page in range(1, 30):#翻页
    #         p.apply_async(index, args=(page,))
    #     print('Waiting for all subprocesses done...')
    #     p.close()
    #     p.join()
    #     print('All subprocesses done.')
  • 相关阅读:
    configure new Linux/Mac
    Python input()和raw_input()的区别
    python json读取txt文本 解析str 出错No JSON object could be decoded
    python TypeError: 'str' object does not support item assignment”
    python中对字典按照value排序
    腾讯实习面试被虐记
    软件里的实践出真知
    c链表实现遇到的错误
    Linux命令(2)
    yum的使用及配置
  • 原文地址:https://www.cnblogs.com/wbf980728/p/14331559.html
Copyright © 2011-2022 走看看