zoukankan html css js c++ java

python3 fofa爬取类

代码实现：

# coding=utf-8

import requests
import configparser
import base64
from lxml import etree

class FofaSpider:

    def __init__(self, search_keyword, page=5):
        self.search_keyword = search_keyword
        self.page = page
        self.getConfig()

    def goSpider(self):
        headers = {
            "Connection": "keep-alive",
            "Cookie": "_fofapro_ars_session=" + self.cookie,
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/65.0.3325.181 Safari/537.36 '
        }

        for i in range(1,self.page + 1):
            url = "https://fofa.so/result?result?q=" + str(self.search_keyword,'utf-8') + '&qbase64=' + str(base64.b64encode(self.search_keyword), 'utf-8') + '&page=' + str(i)
            resp = requests.get(url=url, headers=headers)


            if(resp.status_code == 304):
                print("程序停止， 可能的情况是你当前会员与要爬去的页数不相匹配!")
                exit()

            print("开始解析...")
            #开始解析
            with open(__file__[0:-7] + 'fofa_res.txt','a+',encoding='utf-8') as f:
                lxml_tree = etree.HTML(resp.content.decode('utf-8'))
                url_list = lxml_tree.xpath('//div[@class="list_mod_t"]//a[@target="_blank"]/@href') #正常获取到的URL
                url_list2 = lxml_tree.xpath('//div[@class="list_mod_t"]//div[@class="ip-no-url"]//text()') #不正常获取到的URL，就是有时候没有a标签，类型是ip-no-url
                url_list.extend(url_list2)
                url_title = lxml_tree.xpath('//ul[@class="list_sx1"]//li[1]//text()')
                for url_res, title_res in zip(url_list, url_title):
                    f.write(url_res.strip().replace('
', '').replace('
', '') + "    " + title_res.strip().replace('
', '').replace('
', '') + '
')
                    print(url_res.strip().replace('
', '').replace('
', '') + "   " + title_res.strip().replace('
', '').replace('
', ''))
            print("解析结束...")

    def getConfig(self):
        conf = configparser.ConfigParser()
        conf.read(__file__[0:-7] + 'fofa.config')  # 读config.ini文件
        self.fofa_key = conf.get('config', 'fofa_key')
        self.fofa_email = conf.get('config', 'fofa_email')
        self.cookie = conf.get('config','cookie')


if '__main__' == __name__:
    search_keyword = input("输入你要的Fofa搜索语法: ").encode("utf-8")
    page = input("输入你要爬取的页数(默认为5): ").encode("utf-8")
    if(page):
        fofa = FofaSpider(search_keyword, int(page))
    else:
        fofa = FofaSpider(search_keyword)
    fofa.goSpider()

效果：

查看全文

相关阅读:
NOIP1996 第三题
 vijos P1071
USACO 2.3
NOIP2006 第二题(change)
NOIP2006 第二题
 NOIP2005 第三题
 Building Block 动态规划
 砝码问题 Weight
装箱问题(Packing DP)
算法第二章上机实践报告

原文地址：https://www.cnblogs.com/zpchcbd/p/12606657.html