zoukankan      html  css  js  c++  java
  • python+bs4+urllib

    # -*- coding: utf-8 -*-
    # 
    # 
    # 
    from bs4 import BeautifulSoup
    import urllib2
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    ###url = 'https://www.qidian.com/search?kw=%E7%AC%91%E5%82%B2%E6%B1%9F%E6%B9%96'
    #
    #
    #
    class main():
        def __init__(self):
            pass
        def search(self):
            '''查询函数'''
            ####输入关键字
            ####访问url,获取信息
            ####存储信息
            print '-'*80
            #print '--开始输入关键字: '.decode('utf-8').encode('gbk')
            #key = raw_input('key: ')
            key = urllib2.quote('笑傲江湖')
            url = 'https://www.qidian.com/search?kw='+key
            print '访问的网址是--'.decode('utf-8').encode('gbk')+url
            #####开始访问
            self.spider(url)
        def spider(self, url):
            print '-'*80
            print '开始访问网页'.decode('utf-8').encode('gbk')
            print '-'*80
            response = urllib2.urlopen(url).read()
            obj = BeautifulSoup(response,'html.parser')
            div_list = obj.find('div',{'class':'book-img-text'}).find_all('li')
            for v in div_list:
                name =  v.find('h4').find('a').text
                intro = v.find('div',{'class':'book-mid-info'}).find('p',{'class':'intro'}).text
                print intro
                
        def test(self):
            print urllib2.quote('笑傲江湖')
    
    
    if __name__ == '__main__':
        book = main()
        book.search()
  • 相关阅读:
    APPCAN   版本控制SVN
    关于 java中的换行符
    BCompare中文版安装包
    netstat
    springboot mybatis generator
    mysql删除表的方式
    jdbc写入和读取过程
    hadoop全排序和二次排序
    mapreduce之数据倾斜
    hdfs切片的计算方式
  • 原文地址:https://www.cnblogs.com/shuangzikun/p/taotao_python_bs4_urllib.html
Copyright © 2011-2022 走看看