zoukankan      html  css  js  c++  java
  • 抓取代理IP,然后保存成txt

    #!/usr/bin/env python
    # coding=utf-8
    #针对 www.xicidaili.com
    import re
    import random
    import sys
    import time
    import datetime
    import threading
    from random import choice
    import requests
    import bs4
    import string


    file=open('data.txt','a')  
    def get_ip(str1):
        """获取代理IP"""
        url = "http://www.xicidaili.com/wt/"+str1
        headers = { "Accept":"text/html,application/xhtml+xml,application/xml;",
                    "Accept-Encoding":"gzip, deflate, sdch",
                    "Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
                    "Referer":"http://www.xicidaili.com",
                    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
                    }
        r = requests.get(url,headers=headers)
        soup = bs4.BeautifulSoup(r.text, 'html.parser')
        data = soup.table.find_all("td")
        ip_compile= re.compile(r'<td>(d+.d+.d+.d+)</td>')    # 匹配IP
        port_compile = re.compile(r'<td>(d+)</td>')                # 匹配端口
        ip = re.findall(ip_compile,str(data))       # 获取所有IP
        port = re.findall(port_compile,str(data))   # 获取所有端口
        list = [":".join(i) for i in zip(ip,port)]  
        print list  
        for i in list:
            file.write(str(i)+' ')

    def main():
        count = 1
        while (count < 2000):    
            str1 = str(count)
            get_ip(str1)   
            count += 1
            time.sleep(0.1)
        file.close()    
    if __name__ == '__main__':
        main()

  • 相关阅读:
    some requirement checks failed
    FTP下载文件时拒绝登陆申请怎么办?
    Linux查看与设定别名
    如何编写shell脚本
    Linux shell是什么
    Linux命令大全之查看登陆用户信息
    Linux命令大全之挂载命令
    论第二次作业之输入输出格式怎么合格(才疏学浅说的不对轻点喷我)
    文件词数统计
    软件工程作业--第一周
  • 原文地址:https://www.cnblogs.com/wj2ge/p/7009849.html
Copyright © 2011-2022 走看看