zoukankan      html  css  js  c++  java
  • python3 cms识别类

    参考了其他人的思路,总结了下主要搜集有四个方面:

    1、index.php源代码中特征内容判断

    2、静态目录中的静态资源文件,如图片等

    3、根目录robots.txt 文本中的特征判断

    4、根目录favicon.ico MD5的特征判断

    5、http相应包中的报文特征内容判断

    参考langzi的代码

    # coding=utf-8
    
    import requests
    import json
    import hashlib
    from threading import Thread
    
    
    
    class CmsScan:
        def __init__(self, dest_url):
            self.headers = {
                'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"
            }
    
            self.dest_url = dest_url
    
            with open(__file__[0:-6] + 'body.txt', 'r', encoding='utf-8') as a,open(__file__[0:-6] + 'head.txt', 'r', encoding='utf-8') as b,open(__file__[0:-6] + 'robots.txt', 'r', encoding='utf-8') as d,open(__file__[0:-6] + 'data.json', 'r', encoding='utf-8') as e,open(__file__[0:-6] + 'cms_rule.txt', 'r', encoding='utf-8') as f:
                self.body_content = eval(a.read()) #已经完成
                self.head_content = eval(b.read()) #已经完成
                self.robots_content = eval(d.read()) #已经完成
                self.rule_content = eval(f.read()) #已经完成
                self.data_content = json.load(e) # 已经完成
    
    
        def header_index_content_scan(self):
            # 基于首页源代码中的特征寻找
            try:
                resp = requests.get(url=self.dest_url, headers=self.headers, allow_redirects=False, timeout=3, verify=False)
    
                # 迭代器迭代字典
                for keyword, cms in self.body_content.items():
                    if keyword in resp.content.decode('utf-8'):
                        print("[header_index_content_scan]: cms识别为 " + cms)
                        return
                    else:
                        pass
    
                for keyword, cms in self.head.iteritems():
                    if keyword in resp.headers:
                        print("[scan01_headers]: cms识别为 " + cms)
                        return
                    else:
                        pass
            except:
                pass
    
    
        def robot_scan(self):
            # 根目录robots.txt 文本中的特征判断
    
            if self.dest_url[-1] != '/':
                self.dest_url = self.dest_url + '/'
    
            try:
                resp = requests.get(url=self.dest_url + 'robots.txt', headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                for robots in self.robots_content:
                    if robots in resp.content.decode('utf-8'):
                        print("[robots_scan]: cms识别为 " + robots)
                        return
                    else:
                        pass
            except:
                pass
    
    
        def sub_dir_content_scan(self):
            # cms的特征目录内容文件特征判断
    
            if self.dest_url[-1] == '/':
                self.dest_url = self.dest_url[:-1]
    
            for sub_dir in self.data_content:
                resp = requests.get(url=self.dest_url + sub_dir['url'], headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                cms_type = sub_dir['name']
                if resp.status_code == 200:
                    try:
                        resp_2 = requests.get(url=self.dest_url + sub_dir['url'], headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                        if sub_dir['md5'] == '':
                            # md5不存在的情况下的验证流程
                            if sub_dir['re'] in resp_2.content.decode('utf-8'):
                                print(cms_type)
                                return
                            else:
                                pass
                        else:
                            # md5存在的情况下的验证流程
                            md5 = hashlib.md5()
                            md5.update(resp_2.content)
                            rmd5 = md5.hexdigest()
                            if rmd5 == sub_dir['md5']:
                                print(cms_type)
                                return
                            else:
                                pass
                    except:
                        pass
                else:
                    pass
    
    
        def md5_scan(self):
            # 根目录favicon.ico等 特征MD5的特征判断
    
            if self.dest_url[-1] == '/':
                self.dest_url = self.dest_url[:-1]
    
    
            for i in self.rule_content:
                # 拆分
                cms_dir = i.split('|')[0]
                cms_type = i.split('|')[1]
                cms_md5 = i.split('|')[2]
    
                try:
                    resp = requests.get(url=self.dest_url + cms_dir, headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                    md5 = hashlib.md5()
                    md5.update(resp.content)
                    rmd5 = md5.hexdigest()
                    print(rmd5)
    
                    if rmd5 == cms_md5:
                        print(cms_type)
                        return
                    else:
                        pass
                except:
                    pass
    
    
    
    if '__main__' == __name__:
        a = CmsScan('http://news.eeeqi.cn/')
        thread_list = []
    
        thread_list.append(Thread(target=a.header_index_content_scan, args=()))
        thread_list.append(Thread(target=a.robot_scan, args=()))
        thread_list.append(Thread(target=a.sub_dir_content_scan, args=()))
        thread_list.append(Thread(target=a.md5_scan, args=()))
    
        for t in thread_list:
            t.start()
        for t in thread_list:
            t.join()
    

  • 相关阅读:
    基于角色的权限设计(一)
    js图片懒加载插件封装
    项目中必须知道有关css和html的常识
    设为主页代码及添加到收藏夹代码大全
    JS弹出层、弹窗效果+拖曳功能
    算数验证码
    js基础知识
    基于角色的权限设计(二)
    sqlserver数据类型char和nchar,varchar和nvarchar,text和ntext的用法以及区别?
    经典页面布局,任何分辨率下,全屏显示
  • 原文地址:https://www.cnblogs.com/zpchcbd/p/12606889.html
Copyright © 2011-2022 走看看