zoukankan      html  css  js  c++  java
  • 代理IP

    APL代理

    import requests
    import re
    import os
    from lxml import html#此处直接引入etree报错是因为版本问题,换个方式引入
    etree = html.etree#引入etree方法
    from string import punctuation
    # headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'}
    # source=requests.get('https://www.ip138.com/',headers=headers).text
    # # print(source)
    # demo=etree.HTML(source).xpath('//iframe/@src')
    # demo=''.join(demo)
    # # print(demo)
    # # print(source)
    # #获取代理IP页面
    # source=requests.get('http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=8169392b637f4f9ebeb750f51c4c612a&count=1&expiryDate=0&format=1&newLine=2').json()#将复制的东西粘贴在这
    # print(source)
    # #拼接到字典中
    # proxies={
    #     'http':source['msg'][0]['ip']+':'+source['msg'][0]['port']
    # }
    # # print(proxies)
    # print('http:'+demo)
    # headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'}
    # source=requests.get('http:'+demo,headers=headers).text
    # demo=etree.HTML(source).xpath('/html/body/p[1]/a/text()')
    # demo1=etree.HTML(source).xpath('/html/body/p[1]/text()[2]')
    # print(demo)
    # print(demo1)

    隧道代理

    固定部分

    import requests
    import time
    from multiprocessing import Pool
    # 蘑菇代理的隧道订单
    appKey = "OWxHV1kyUnVvdlJodVpqNTpHZDhSZkdJb3VWYVdueHJ6"#粘贴
    # 蘑菇隧道代理服务器地址
    ip_port = 'secondtransfer.moguproxy.com:9001'
    
    proxy = {"http": "http://" + ip_port,"https": "https://" + ip_port}
    headers = {
      "Proxy-Authorization": 'Basic '+ appKey,
      "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
      "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4"}

    以下为获取页面源码

    #     source = requests.get('https://tj.fang.anjuke.com/loupan/all/p' + str(page) + '/', headers=headers, proxies=proxy,verify=False, allow_redirects=False).text  #获取页面源码
    #     demo = etree.HTML(source).xpath('//*[@id="container"]/div[2]/div[1]/div[3]/div/div/a[1]/span/text()')

    以下为多线程

    # #多线程
    # def index(page):
    #     source = requests.get('https://tj.fang.anjuke.com/loupan/all/p' + str(page) + '/', headers=headers, proxies=proxy,verify=False, allow_redirects=False).text  #获取页面源码
    #     demo = etree.HTML(source).xpath('//*[@id="container"]/div[2]/div[1]/div[3]/div/div/a[1]/span/text()')
    #     print(demo)
    #     print('=====================第'+str(page)+'页==================')
    # if __name__ == '__main__':
    #     # print('Parent process %s.' % os.getpid())
    #     p = Pool(2)#开启的线程数
    #     for page in range(1, 30):#翻页
    #         p.apply_async(index, args=(page,))
    #     print('Waiting for all subprocesses done...')
    #     p.close()
    #     p.join()
    #     print('All subprocesses done.')
  • 相关阅读:
    水平触发与边缘触发
    Leetcode935 骑士拨号器
    leetcodeF47 礼物的最大价值
    leetcodeF42 连续子数组的最大和
    leetcode12 矩阵中的路径 回溯算法
    leetcode14-II 剪绳子II DP 解法配合快速乘取模
    leetcode17.16 按摩师DP
    leetcode530 二叉树的最小绝对差
    PCB genesis大孔加小孔(即卸力孔)实现方法
    PCB genesis短槽加引导孔实现方法
  • 原文地址:https://www.cnblogs.com/wbf980728/p/14331559.html
Copyright © 2011-2022 走看看