zoukankan      html  css  js  c++  java
  • python

    # coding = utf-8
    import urllib
    import sys
    import urllib
    import json
    import socket
    import time
    sys.path.append('E:\software\python\beautifulsoup4-4.5.3')
    sys.path.append('E:\software\python\Lib\site-packages')
    from bs4 import BeautifulSoup

    import re
    import urllib2
    import MySQLdb

    page = 1
    url = 'http://www.qiushibaike.com/hot/page/' + str(page)
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = { 'User-Agent' : user_agent }
    try:
    request = urllib2.Request(url,headers = headers)
    response = urllib2.urlopen(request)
    soup = response.read().decode('utf-8')
    #print soup

    #pattern = re.compile('<div .*?content">'+
    # '(.*?)</div>',re.S)
    pattern = re.compile('<div .*?<h2>(.*?)</h2>.*?er womenIcon">(.*?)'+
    '</div>.*?class="content">(.*?)</div></a>'+
    '.*?ass="thumb">(.*?)</div><div class="stats">(.*?)'+
    '</div>',re.S)
    #print soup
    items = re.findall(pattern,soup)

    print type(items)
    print items
    #print items[0].count(1)
    #print type(items[0][0])
    print items[0][0].encode('utf-8')

    except urllib2.URLError, e:
    if hasattr(e,"code"):
    print e.code
    if hasattr(e,"reason"):
    print e.reason

  • 相关阅读:
    0317复利计算的回顾与总结
    0518 Scrum 项目 5.0
    0517 Scrum 项目4.0
    0512 Scrum 项目3.0
    实验三 进程调度模拟程序
    0505 Scrum 项目1.0
    0502团队项目 SCRUM团队成立
    0428 团队项目2.0
    0422团队项目
    实验二 作业调度模拟程序
  • 原文地址:https://www.cnblogs.com/kongxc/p/7787941.html
Copyright © 2011-2022 走看看