zoukankan      html  css  js  c++  java
  • python

    # coding = utf-8
    import urllib
    import sys
    import urllib
    import json
    import socket
    import time
    sys.path.append('E:\software\python\beautifulsoup4-4.5.3')
    sys.path.append('E:\software\python\Lib\site-packages')
    from bs4 import BeautifulSoup

    import re
    import urllib2
    import MySQLdb

    page = 1
    url = 'http://www.qiushibaike.com/hot/page/' + str(page)
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = { 'User-Agent' : user_agent }
    try:
    request = urllib2.Request(url,headers = headers)
    response = urllib2.urlopen(request)
    soup = response.read().decode('utf-8')
    #print soup

    #pattern = re.compile('<div .*?content">'+
    # '(.*?)</div>',re.S)
    pattern = re.compile('<div .*?<h2>(.*?)</h2>.*?er womenIcon">(.*?)'+
    '</div>.*?class="content">(.*?)</div></a>'+
    '.*?ass="thumb">(.*?)</div><div class="stats">(.*?)'+
    '</div>',re.S)
    #print soup
    items = re.findall(pattern,soup)

    print type(items)
    print items
    #print items[0].count(1)
    #print type(items[0][0])
    print items[0][0].encode('utf-8')

    except urllib2.URLError, e:
    if hasattr(e,"code"):
    print e.code
    if hasattr(e,"reason"):
    print e.reason

  • 相关阅读:
    是否完全二叉搜索树 (30 分)
    链表去重
    关于堆的判断
    玩转二叉树
    hdu-2795 Billboard(线段树)
    线段树超级大模版
    博弈dp 以I Love this Game! POJ
    kuangbin 最小生成树
    Infinite Maze CodeForces
    Alice’s Stamps HDU
  • 原文地址:https://www.cnblogs.com/kongxc/p/7787941.html
Copyright © 2011-2022 走看看