zoukankan      html  css  js  c++  java
  • 【Python】爬取百度图片进行人脸识别

    import os,cv2,requests,json,re,time
    import tensorflow as tf
    from bs4 import BeautifulSoup
    
    def check_path(path):
        try:
            a = []
            for i in path.split('/'):
                if i != '':
                    a.append(i)
            path = '/'.join(a)
        except:
            pass
        return path
    
    def decrypt_objURL(str):
        """
    
        :param str: 加密的图片地址
        :return:解密后的图片地址 type=str
        """
        table = {'w': "a", 'k': "b", 'v': "c", '1': "d", 'j': "e", 'u': "f", '2': "g", 'i': "h",
                 't': "i", '3': "j", 'h': "k", 's': "l", '4': "m", 'g': "n", '5': "o", 'r': "p",
                 'q': "q", '6': "r", 'f': "s", 'p': "t", '7': "u", 'e': "v", 'o': "w", '8': "1",
                 'd': "2", 'n': "3", '9': "4", 'c': "5", 'm': "6", '0': "7",
                 'b': "8", 'l': "9", 'a': "0", '_z2C$q': ":", "_z&e3B": ".", 'AzdH3F': "/"}
    
        url = re.sub(r'(?P<value>_z2C$q|_z&e3B|AzdH3F+)', lambda matched: table.get(matched.group('value')),str)
        new_url = re.sub(r'(?P<value>[0-9a-w])', lambda matched: table.get(matched.group('value')), url)
    
        return new_url
    
    def Request_Img(word='佟丽娅',imgNum=300):
        objURL_list = []
        for i,page in enumerate(range(0,imgNum,30)):
            Url = 'http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={}&pn={}'.format(word,str(page))
            response = requests.get(url=Url).json()['data']
            # print(response)
            try:
                for img in response:
                    url = decrypt_objURL(img['objURL'])
                    # print(url)
                    objURL_list.append(url)
            except Exception as e:
                print('出现异常!!!',e)
    
        return objURL_list
    
    def Face_Detection(urllist,savepath='./TLY'):
        if len(urllist) != 0:
            for url in urllist:
                print(url)
                try:
                    re = requests.get(url=url).content
                    with open('./.img','wb') as f:
                        f.write(re)
                    face_cascade = cv2.CascadeClassifier('./haarcascade_frontalface_default.xml')
                    img = cv2.imread('./.img')
                    # cv2.imshow('etst',img)
                    # cv2.waitKey(10)
                    # cv2.destroyAllWindows()
                    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
                    faces = face_cascade.detectMultiScale(gray,
                                                          scaleFactor=1.15,
                                                          minNeighbors=10,
                                                          minSize=(1,1))
                    if len(faces) != 0:
                        print(faces)
                        for x,y,w,h in faces:
                            if not os.path.exists(savepath):
                                os.mkdir(savepath)
                            if not os.path.exists(check_path(savepath+'/face')):
                                os.mkdir(check_path(savepath+'/face'))
                            name = ''.join(str(time.time()).split('.'))
                            cv2.imwrite(savepath+'/face/'+name+'_face'+'.jpg',img[y-10:y+h+10,x-10:x+w+10])
                            cv2.imwrite(savepath+'/'+name+'.jpg',cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2))
    
                except:
                    pass
    
    
    if __name__ == '__main__':
        Face_Detection(Request_Img())
  • 相关阅读:
    基于傅里叶变换的音频重采样算法 (附完整c代码)
    自动曝光修复算法 附完整C代码
    3D Lut 电影级调色算法 附完整C代码
    之于图片主色调提取算法
    并发中的各种锁
    算法---BitMap
    高级数据结构---堆树和堆排序
    高级数据结构---赫(哈)夫曼树及java代码实现
    域名和服务器绑定及https协议更换
    高级数据结构---B树和B+树及mysql索引分析
  • 原文地址:https://www.cnblogs.com/zxingwork/p/11421634.html
Copyright © 2011-2022 走看看