Python 爬取盗墓笔记的标题,章节,章节名称 - 走看看

zoukankan html css js c++ java

Python 爬取盗墓笔记的标题,章节,章节名称

# coding:utf-8
import requests
import json
from bs4 import BeautifulSoup

user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

headers = {'User-Agent': user_agent}

r = requests.get("http://seputu.com/", headers=headers)

soup = BeautifulSoup(r.text, 'html.parser', from_encoding='utf-8') # html.parser

content = []

for mulu in soup.find_all(class_="mulu"):

h2 = mulu.find('h2')

if h2 != None:

h2_title = h2.string # 获取标题

list = []

for a in mulu.find(class_='box').find_all('a'): # 获取所有的a标记中url和章节内容

href = a.get('href')

box_title = a.get('title')

list.append({'href': href, 'box_title':box_title});

content.append({'title': h2_title, 'content': list})

with open('qiye.json', 'wb') as fp:
json.dump(content, fp=fp, indent=4)

查看全文

相关阅读:
#include <boost/shared_array.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/scoped_array.hpp>
df命令
 telnet命令
 sort 命令
 苏宁大数据面试题
 hive严格模式
 k-means伪代码
 vim编辑器

原文地址：https://www.cnblogs.com/paulversion/p/8336509.html

Copyright © 2011-2022 走看看