zoukankan      html  css  js  c++  java
  • 吴裕雄 实战python编程(2)

    from urllib.parse import urlparse

    url = 'http://www.pm25x.com/city/beijing.htm'
    o = urlparse(url)
    print(o)

    print("scheme={}".format(o.scheme)) # http
    print("netloc={}".format(o.netloc)) # www.pm25x.com
    print("port={}".format(o.port)) # None
    print("path={}".format(o.path)) # /city/beijing.htm
    print("query={}".format(o.query)) # 空

    import requests

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="GBK"
    print(html.text)

    import requests
    import numpy as np

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="gbk"
    htmllist = html.text.splitlines()
    print(type(htmllist))
    print(np.shape(htmllist))
    for row in htmllist:
    print(row)

    import requests

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="gbk"

    htmllist = html.text.splitlines()
    n=0
    for row in htmllist:
    if "新概念" in row:
    n+=1
    print("找到 {} 次!".format(n))

    import re
    pat = re.compile('[a-z]+')

    m = pat.match('tem12po')
    print(m)

    if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

    import re
    m = re.match(r'[a-z]+','tem12po')
    print(m)

    if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

    import re

    pat = re.compile('[a-z]+')

    m = pat.search('3tem12po')
    print(m) # <_sre.SRE_Match object; span=(1, 4), match='tem'>

    if not m==None:
    print(m.group()) # tem
    print(m.start()) # 1
    print(m.end()) # 4
    print(m.span()) # (1,4)

    import re
    pat = re.compile('[a-z]+')

    m = pat.findall('tem12po')
    print(m) # ['tem', 'po']

    import requests,re

    regex = re.compile('[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-.]+')
    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    emails = regex.findall(html.text)
    for email in emails:
    print(email)

  • 相关阅读:
    java内存回收机制
    scala学习
    [java实现]找一个数组的最大和的连续子数组(时间复杂度 O(n))
    linux 进程的创建
    linux中的进程和线程
    linux 文件系统
    gdb 调试程序
    makefile
    linux下的gcc编译器
    socket 网络编程
  • 原文地址:https://www.cnblogs.com/tszr/p/10061172.html
Copyright © 2011-2022 走看看