这个爬虫可以爬取指定公司的注册时间、公司性质。
如果你有其他需求,也可以自己添加其他功能
源代码奉上。
# -*- coding:utf-8 -*- import requests from lxml import etree import random import re # import HTMLParser from html.parser import HTMLParser import time proxy = [ 'http://112.83.86.88:2589', 'https://117.92.128.239:2444', 'https://117.94.120.55:4734', 'https://116.149.201.121:6436', 'https://111.72.104.133:4184', 'https://113.103.151.180:4217', 'https://60.189.139.208:4241', 'https://222.191.171.98:4263', 'https://182.108.168.108:4234', 'https://115.209.194.193:4270' ] USER_AGENTS = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52" ] cookie = [ #'cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544282809; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544203531,1544208878,1544228671,1544282750; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ; _gat_gtag_UA_123487620_1=1; aliyungf_tc=AQAAAP3ZjDwU1g4Atqirc9QaiHisIOU/; csrfToken=7gGwERRS-FP8JQzmif8q07j0', #"aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544278786", #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544285733; _gat_gtag_UA_123487620_1=1' #'cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544286129; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544208878,1544228671,1544282750,1544286017; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NjEyMCwiZXhwIjoxNTU5ODM4MTIwfQ.vJDqZCjey7bEslU-cXFA37Vm3fTieNYfm3mDSKGTsu1RIez1tcnRsLfEkMSuoJKekAQdv54BQcg5cMdFlGNXdw%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839191691%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NjEyMCwiZXhwIjoxNTU5ODM4MTIwfQ.vJDqZCjey7bEslU-cXFA37Vm3fTieNYfm3mDSKGTsu1RIez1tcnRsLfEkMSuoJKekAQdv54BQcg5cMdFlGNXdw; aliyungf_tc=AQAAAP3ZjDwU1g4Atqirc9QaiHisIOU/; csrfToken=7gGwERRS-FP8JQzmif8q07j0; token=48828ad9e5e2473b9d80a3ce50affed5; _utm=2d38f2012e5a4923ad7c3a49064cfab6' #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544285733; _gat_gtag_UA_123487620_1=1' #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NzMwNCwiZXhwIjoxNTU5ODM5MzA0fQ.SyqDyJ46V5twVcZpS1uXXQpQrzQn8rXEwnqcbul5BpoK7J1FY_0UFMs9e-BZTOdxLK2DRxbZ2l-ytuCtVKyFog%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839191691%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NzMwNCwiZXhwIjoxNTU5ODM5MzA0fQ.SyqDyJ46V5twVcZpS1uXXQpQrzQn8rXEwnqcbul5BpoK7J1FY_0UFMs9e-BZTOdxLK2DRxbZ2l-ytuCtVKyFog; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544310215' #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; _gat_gtag_UA_123487620_1=1; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25222%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544331591' #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25222%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544331613' #'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; token=fb752813c3804392967213185f054782; _utm=afb6110122f64712bcb1e0c6323d1640; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MzgzMyIsImlhdCI6MTU0NDMzNTkyOSwiZXhwIjoxNTU5ODg3OTI5fQ.QACN7m079VlZsDYFYGsSAbQoYhXHt_phxFxJkNCizbpN1bIoUn0FnGY2B6uOZhFG-p-OUYsIc8qFcK0UStwfgQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839193833%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MzgzMyIsImlhdCI6MTU0NDMzNTkyOSwiZXhwIjoxNTU5ODg3OTI5fQ.QACN7m079VlZsDYFYGsSAbQoYhXHt_phxFxJkNCizbpN1bIoUn0FnGY2B6uOZhFG-p-OUYsIc8qFcK0UStwfgQ; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544335934; _gat_gtag_UA_123487620_1=1' 'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; token=5092657e644e48b6a5e82360b799e518; _utm=21899f8b8a05449e8975c8bec22037af; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYwMTE2NjUwNCIsImlhdCI6MTU0NDM0MDkyNiwiZXhwIjoxNTU5ODkyOTI2fQ.5GS2SnPon-4fmd-yIYtQ9Yr0ZWMqHvrXr_ks3oRmeHm-rgPjcP63yMeuPrPb1axXpg8syEkiyzwBxUc03TpeYg%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216601166504%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYwMTE2NjUwNCIsImlhdCI6MTU0NDM0MDkyNiwiZXhwIjoxNTU5ODkyOTI2fQ.5GS2SnPon-4fmd-yIYtQ9Yr0ZWMqHvrXr_ks3oRmeHm-rgPjcP63yMeuPrPb1axXpg8syEkiyzwBxUc03TpeYg; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544340930; _gat_gtag_UA_123487620_1=1' ] cookie1 = random.choice(cookie) dd=random.choice(USER_AGENTS) headers={ "Referer": "https://www.baidu.com/", "Cookie" : cookie1, "User-Agent": "%s"%dd } def down_load(url): proxy1 = {'http': random.choice(proxy)} cc=requests.get(url=url,headers=headers,proxies=proxy1) # cc=etree.HTML(cc) # cc.encode("utf-8").decode("utf-8") cc.encoding="utf-8" return cc.text list1 = ["中科润蓝环保技术(北京)有限公司","中联先进钢铁材料技术有限责任公司","中铝国际工程设备有限公司","中铝润滑科技有限公司","中石化催化剂(北京)有限公司","中石化三菱化学聚碳酸酯(北京)有限公司","中天开元(北京)建筑装饰工程有限公司","中铁丰桥桥梁有限公司","中铁物总轨道装备贸易有限公司","中新高科工程技术(北京)有限公司","中新联进出口公司","中信国安盟固利电源技术有限公司" ] for i in list1: time.sleep(2) first_url="https://m.tianyancha.com/search?key=%s"%i a=down_load(first_url)#下载页面 # print(a) a=etree.HTML(a) detail_url=a.xpath('//div[contains(@class,"col-xs-10")]/a/@href') # boss=a.xpath('//div[@class="search_row_new_mobil"]//a/text()') # the_registered_capital=a.xpath('//div[@class="search_row_new_mobil"]/div/div[2]/span/text()') the_registered_time=a.xpath('//div[@class="search_row_new_mobil"]/div/div[3]/span/text()') # print(detail_url[0],company,the_registered_time[0]) detail_html = down_load(detail_url[0]) bb = etree.HTML(detail_html) company_type = bb.xpath('//div[@class="item-line"][6]/span/text()') # 公司注册时间 the_registered_time = the_registered_time[0] #公司名称 company = i #企业类型 company_type = company_type[1] print(company,the_registered_time,company_type)
爬取结果: