百度贴吧爬虫小程序源码

# _*_ coding::utf_8 _*_
import urllib.request
import urllib.parse
import os
url='http://tieba.baidu.com/f?'
start=int(input("请输入开始页码："))
end=int(input("请输入结束页码："))
name=input("请输入搜索贴吧的名字:")
name1=urllib.parse.quote(name)
num1=(start-1)*50
num2=(end-1)*50
if not os.path.exists(name):
    path = r'E:python'
    os.mkdir(path+name)
for i in range(start,end):
    data={
    'kw':name1,
    'ie':'utf-8',
    'pn':i,
    }
    headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
    }
    data=urllib.parse.urlencode(data)
    url+=data
    request=urllib.request.Request(url=url,headers=headers)
    response=urllib.request.urlopen(request)
    filename=name+'_'+str(i)+".html"
    filepath=path+name+'/'+filename
    with open(filepath,'wb') as fp:
        fp.write(response.read())
        print("下载完成第{n}页".format(n=i))

查看全文

相关阅读:
【学习总结】测试开发工程师面试指南-软件测试行业分析与职业解析
 【学习总结】测试开发工程师面试指南-汇总
 【JAVA】java中char类型数组用数组名打印结果不是地址值而是数组内容
 Python常见问题合集
 操作系统常见问题合集
 算法题常见问题合集
 个人向常见问题合集
 Linux常见问题合集
 数据结构常见问题合集
 网络常见问题合集

原文地址：https://www.cnblogs.com/ybl20000418/p/11609596.html