zoukankan html css js c++ java

骑行入门

对每一个骑者的入门进行推荐，爬虫：

import re
import requests
from bs4 import BeautifulSoup
import csv

def paqu():
    f = open('../../Documents/Tencent Files/631836111/FileRecv/骑行入门.csv', 'a+', encoding='utf-8', newline='')
    csv_writer = csv.writer(f)
    csv_writer.writerow(["题目", "内容链接", "详细内容"])
    headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
        }
    link = 'http://www.biketo.com/beginnerguide/'
    r = requests.get(link, headers=headers, timeout=10)
    soup = BeautifulSoup(r.text, "lxml")
    li_list = soup.find_all('li')[4:140]
    for each in li_list:
        txt=each.find('a').string
        href=each.find('a')['href']
        print(txt + "      " + href)
        Response = requests.get(href, headers=headers, timeout=10)
        soup1 = BeautifulSoup(Response.text, "lxml")
        div_list = soup1.find_all('p', style='text-indent:2em;')
        #div_list= soup1.find_all('div',class_='article-content')

        txt1=[]
        for each1 in div_list:
            if(each1.string is not None):
                txt1.append(each1.string)
        #print(*txt1)
        csv_writer.writerow([txt, href,*txt1])

if __name__ == '__main__':
    paqu()

获得的数据：

是多疑还是去相信谎言背后的忠心或许是自己太执迷命题游戏沿着他的脚步呼吸开始变得急促就算看清了面目设下埋伏真相却居无定处 I swear I'll never be with the devil 用尽一生孤独没有退路的路你看不到我眉眼焦灼却不明下落命运的轮轴伺机而动来不及闪躲沿着他的脚步呼吸开始变得急促就算看清了面目设下埋伏真相却居无定处 I swear I'll never be with the devil 用尽一生孤独没有退路的路你看不到我眉眼焦灼却不明下落命运的轮轴伺机而动来不及闪躲你看不到我眉眼焦灼却不明下落命运的轮轴伺机而动来不及闪躲黑夜和白昼你争我夺真相被蛊惑心从不退缩这天堂荒漠留给孤独的猎手

查看全文

相关阅读:
web开发之web 验证码--- webqq 机器人
 赛星软件---智能视频分析事件检测
 模式识别之双目立体视觉---双目立体视觉相关介绍
 安卓项目之微信公众好---初体验
 《数学分析》视频
 单目和双目模式识别---游戏控制
 多媒体开发之音频编码---ffmpeg 编码aac
PC如何访问手机网址
 linux shell实现守护进程看门狗脚本
 grep用法详解:grep与正则表达式

原文地址：https://www.cnblogs.com/muailiulan/p/14534388.html