''' @author:zl @contact: @site: https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,2.html ''' # _*_ coding:utf-8 _*_ import requests from bs4 import BeautifulSoup import re import time from pymongo import MongoClient import xlwt import json headers = { 'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" , 'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 'accept-encoding': "gzip, deflate, br", 'accept-language': "zh-CN,zh;q=0.9", 'cache-control': "max-age=0", 'upgrade-insecure-requests': "1", 'Connection': 'keep-alive', 'Host': "search.51job.com", } # 获取源码 def get_content(): post_param = {'action':'','start': 0,'limit':300} html = requests.get("https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90", params=post_param, verify=False) #jsondata = html.content.decode(encoding='utf-8') jsondata=html.json() return jsondata # 获取字段 def get(jsondata): #jsondata=json.loads(jsondata) list = [] for i in jsondata: item ={ 'rank':i['rank'], 'cover_url':i['cover_url'], 'id':i['id'], 'types':i['types'], 'regions':i['regions'], 'title':i['title'], 'url':i['url'], 'release_date':i['release_date'], 'actor_count':i['actor_count'], 'vote_count':i['vote_count'], 'score':i['score'], 'actors':i['actors'], } list.append(item) return list # 爬到的内容写入excel def excel_write(items): for item in items: # 职位信息 j=0 for i in item: print(item[i]) print("j:",j) index=item['rank'] print("index:",index) ws.write(index, j, item[i]) # 行,列,数据 j += 1 if __name__ == '__main__': newTable = "test2.xls" # 表格名称 wb = xlwt.Workbook(encoding='utf-8') # 创建excel文件,声明编码 ws = wb.add_sheet('sheet1',cell_overwrite_ok=True) # 创建表格 headData = ['rank', 'cover_url', 'id','types','regions','title','url','release_date','actor_count','vote_count','score','actors'] # 表头信息 for colnum in range(0,12): ws.write(0,colnum,headData[colnum],xlwt.easyxf('font: bold on')) excel_write(get(get_content())) wb.save(newTable)
#解析json
import requests
response=requests.get('http://httpbin.org/get')
import json
res1=json.loads(response.text)
#太麻烦
res2=response.json()
#直接获取json数据
print(res1 == res2) #True
''' @author:zl @contact: @site: https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,2.html ''' # _*_ coding:utf-8 _*_ import requests from bs4 import BeautifulSoup import re import time from pymongo import MongoClient import xlwt import json headers = { 'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" , 'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 'accept-encoding': "gzip, deflate, br", 'accept-language': "zh-CN,zh;q=0.9", 'cache-control': "max-age=0", 'upgrade-insecure-requests': "1", 'Connection': 'keep-alive', 'Host': "search.51job.com", } # 获取源码 def get_content(): post_param = {'action':'','start': 0,'limit':300} html = requests.get("https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90", params=post_param, verify=False) #jsondata = html.content.decode(encoding='utf-8') jsondata=html.json() return jsondata # 获取字段 def get(jsondata): #jsondata=json.loads(jsondata) list = [] for i in jsondata: item ={ 'rank':i['rank'], 'cover_url':i['cover_url'], 'id':i['id'], 'types':i['types'], 'regions':i['regions'], 'title':i['title'], 'url':i['url'], 'release_date':i['release_date'], 'actor_count':i['actor_count'], 'vote_count':i['vote_count'], 'score':i['score'], 'actors':i['actors'], } list.append(item) return list # 爬到的内容写入excel def excel_write(items): for item in items: # 职位信息 j=0 for i in item: print(item[i]) print("j:",j) index=item['rank'] print("index:",index) ws.write(index, j, item[i]) # 行,列,数据 j += 1 if __name__ == '__main__': newTable = "test2.xls" # 表格名称 wb = xlwt.Workbook(encoding='utf-8') # 创建excel文件,声明编码 ws = wb.add_sheet('sheet1',cell_overwrite_ok=True) # 创建表格 headData = ['rank', 'cover_url', 'id','types','regions','title','url','release_date','actor_count','vote_count','score','actors'] # 表头信息 for colnum in range(0,12): ws.write(0,colnum,headData[colnum],xlwt.easyxf('font: bold on')) excel_write(get(get_content())) wb.save(newTable)