zoukankan      html  css  js  c++  java
  • python爬西刺代理

    爬IP代码

    import requests
    import re
    import  dauk
    from bs4 import BeautifulSoup
    import time
    def daili():
          print('[+]极速爬取代理IP,默认为99页')
          for b in range(1,99):
            url="http://www.xicidaili.com/nt/{}".format(b)
            header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'}
            r=requests.get(url,headers=header)
            gsx=BeautifulSoup(r.content,'html.parser')
            for line in gsx.find_all('td'):
                sf=line.get_text()
                dailix=re.findall('(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d)',str(sf))
                for g in dailix:
                    po=".".join(g)
                    print(po)
                    with open ('采集到的IP.txt','a') as l:
                        l.write(po+'
    ')
    
    daili()
    
    
    def dailigaoni():
        print('[+]极速爬取代理IP,默认为99页')
        for i in range(1,99):
          url="http://www.xicidaili.com/nn/{}".format(i)
          header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
          r=requests.get(url,headers=header)
          bks=r.content
          luk=re.findall('(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d)',str(bks))
          for g in luk:
              vks=".".join(g)
              print(vks)
              with open('采集到的IP.txt','a') as b:
                  b.write(vks+'
    ')
    dailigaoni()
    
    def dailihtp():
        print('[+]极速爬取代理IP,默认为99页')
        for x in range(1,99):
            header="{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}"
            url="http://www.xicidaili.com/wn/{}".format(x)
            r=requests.get(url,headers=header)
            gs=r.content
            bs=re.findall('(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d)',gs)
            for kl in bs:
                kgf=".".join(kl)
                print(kgf)
                with open ('采集到的IP.txt','a') as h:
                    h.write(kgf)
    dailihtp()
    
    def dailihttps():
        print('[+]极速爬代理IP,默认为99页')
        for s in range(1,99):
            url="http://www.xicidaili.com/wt/{}".format(s)
            header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
            r=requests.get(url,headers=header)
            kl=r.content
            lox=re.findall('(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d).(25[0-5]|2[0-4]d|[0-1]d{2}|[1-9]?d)',kl)
            for lk in lox:
                los=".".join(lk)
                print(los)
                with open('采集到的IP.txt','a') as lp:
                    lp.write(los)
    dailihttps()
    

     端口代码

    import requests
    import re
    from bs4 import BeautifulSoup
    
    
    def daili():
        print('[+]极速爬取代理IP端口,默认为99页')
        for b in range(1, 99):
            url = "http://www.xicidaili.com/nt/{}".format(b)
            header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'}
            r = requests.get(url, headers=header)
            gsx = BeautifulSoup(r.content, 'html.parser')
            for line in gsx.find_all('td'):
                sf = line.get_text()
                dailix = re.findall(
                    '<td>([0-9]|[1-9]d{1,3}|[1-5]d{4}|6[0-5]{2}[0-3][0-5])</td>',
                    str(sf))
                for g in dailix:
                    po = ".".join(g)
                    print(po )
                    with open('采集到的端口.txt.txt', 'a') as l:
                        l.write(po + '
    ')
    
    
    daili()
    
    
    def dailigaoni():
        print('[+]极速爬取代理IP的端口,默认为99页')
        for i in range(1, 99):
            url = "http://www.xicidaili.com/nn/{}".format(i)
            header = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
            r = requests.get(url, headers=header)
            bks = r.content
            luk = re.findall(
                '<td>([0-9]|[1-9]d{1,3}|[1-5]d{4}|6[0-5]{2}[0-3][0-5])</td>',
                str(bks))
            for g in luk:
                vks = ".".join(g)
                print(vks)
                with open('采集到的端口.txt.txt', 'a') as b:
                    b.write(vks + '
    ')
    
    
    dailigaoni()
    
    
    def dailihtp():
        print('[+]极速爬取代理IP,默认为99页')
        for x in range(1, 99):
            header = "{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}"
            url = "http://www.xicidaili.com/wn/{}".format(x)
            r = requests.get(url, headers=header)
            gs = r.content
            bs = re.findall(
                '<td>([0-9]|[1-9]d{1,3}|[1-5]d{4}|6[0-5]{2}[0-3][0-5])</td>',
                gs)
            for kl in bs:
                kgf = ".".join(kl)
                print(kgf)
                with open('采集到的端口.txt.txt', 'a') as h:
                    h.write(kgf)
    
    
    dailihtp()
    
    
    def dailihttps():
        print('[+]极速爬代理IP的端口,默认为99页')
        for s in range(1, 99):
            url = "http://www.xicidaili.com/wt/{}".format(s)
            header = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
            r = requests.get(url, headers=header)
            kl = r.content
            lox = re.findall(
                '<td>([0-9]|[1-9]d{1,3}|[1-5]d{4}|6[0-5]{2}[0-3][0-5])</td>',
                kl)
            for lk in lox:
                los = ".".join(lk)
                print(los)
                with open('采集到的端口.txt', 'a') as lp:
                    lp.write(los)
    
    
    dailihttps()
    

      调用代码

    print('''
                       _ooOoo_ 
                      o8888888o 
                      88" . "88 
                      (| -_- |) 
                      O  =  /O 
                   ____/`---'\____ 
                 .'  \|     |//  `. 
                /  \|||  :  |||//   
               /  _||||| -:- |||||-   
               |   | \  -  /// |   | 
               | \_|  ''---/''  |   | 
                 .-\__  `-`  ___/-. / 
             ___`. .'  /--.--  `. . __ 
          ."" '<  `.___\_<|>_/___.'  >'"". 
         | | :  `- \`.;` _ /`;.`/ - ` : | | 
            `-.   \_ __ /__ _/   .-` /  / 
    ======`-.____`-.___\_____/___.-`____.-'====== 
                       `=---=' 
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
                佛祖保佑       永无BUG 
                ''')
    
    print('[!]爬虫速度过快,导致IP被封请更换IP')
    print('[*]极速爬取代理IP')
    print('1.普通代理IP')
    print('2.高匿代理IP')
    print('3.http代理IP')
    print('4.https代理IP')
    bk=input('请选择:')
    def xs():
      import 代理.daili
      import 代理.dauk
      if bk=='1':
            代理.daili.daili.daili()
            代理.dauk.daili()
            exit()
      elif bk=='2':
          代理.daili.daili.dailigaoni()
          代理.dauk.dailigaoni()
          exit()
      elif bk=='3':
          代理.daili.daili.dailihtp()
          代理.dauk.dailihtp()
          exit()
      elif bk=='4':
          代理.daili .daili.dailihttps()
          代理.dauk.dailihttps()
          exit()
      elif bk=='q':
          exit()
      else:
          print('[-]没有找到你要的选项')
    xs()
    

     

    2018-02-17

  • 相关阅读:
    手写Promise——基于es6的Promise实现(含详细注释)
    手写promise
    package.json里面配置的啥
    package.json配置详解
    package.json的所有配置项及其用法,你都熟悉么
    sass语法进阶小结
    [转]利用vue-cli3快速搭建vue项目详细过程
    vue的接口封装和状态管理
    Vue项目封装请求数据的接口总结
    JSDoc入门使用指南 -- 手摸手教你用JSDoc(超好用的js文档生成工具)
  • 原文地址:https://www.cnblogs.com/haq5201314/p/8451683.html
Copyright © 2011-2022 走看看