zoukankan      html  css  js  c++  java
  • python网络编程学习笔记(一)

    python网络编程学习笔记(一)

    python网络编程基础,第四版
    pycharm实现,python版本2.7.5

    第一部分 底层网络

    一、笔记

    #coding=utf-8
    第一章
    
    import socket,sys
    port=70
    print (len(sys.argv))
    for i in range(len(sys.argv)):
        print (sys.argv[i])
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    print ("old s is:",s)
    print (host)
    s.connect((host,port))
    print ("new s is:",s)
    
    s.sendall(filename+"
    ")
    
    while 1:
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    

    加入错误处理

    import socket,sys
    port=70
    print (len(sys.argv))
    for i in range(len(sys.argv)):
        print (sys.argv[i])
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    print ("old s is:",s)
    print (host)
    
    try:
      s.connect((host, port))
    except socket.gaierror,e:
        print ("ERROR connection to server:%s" %e)
        sys.exit(1)
    s.sendall(filename+"
    ")
    
    while 1:
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    
    文件接口类重写
    import socket,sys
    port=70
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    fd=s.makefile('rw',0)
    fd.write(filename+"
    ")
    
    
    for line in fd.readlines():
        sys.stdout.write(line)
    

    基本服务器操作

    import socket
    
    host=''
    port=80
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    print "Server is running on port %d;press ctrl-c to
        terminate."% port
    
    while 1:
        clientsock,clientaddr=s.accept()
        clientfile=clientsock.makefile('rw',0)
        clientfile.write("welcome,"+str(clientaddr)+"/n")
        clientfile.write("Please enter a string:")
        line=clientfile.readline().strip()
        clientfile.write("You entered %d characters.
    "%len(line))
        clientfile.close()
        clientsock.close()
    

    高级接口

    import gopherlib,sys
    host=sys.argv[1]
    file=sys.argv[2]
    
    f=gopherlib.send_selector(file,host)
    for line in f.readlines():
        sys.stdout.write(line)
    
    import urllib,sys
    host=sys.argv[1]
    file=sys.argv[2]
    
    f=urllib.urlopen('gopher://%s%s'%(host,file))
    for line in f.readlines():
        sys.stdout.write(line)
    
    import urllib,sys
    f=urllib.urlopen(sys.argv[1])
    while 1:
        buf=f.read(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    

    第二章
    使用udp

    第三章 网络服务器

    import socket
    solist=[x for x in dir(socket) if x.startswith('SO')]
    solist.sort()
    for x in solist:
        print x
    

    第四章 域名系统

    import sys,socket
    
    result=socket.getaddrinfo(sys.argv[1],None)
    print result[0][4]
    
    import sys,socket
    
    result=socket.getaddrinfo(sys.argv[1],None)
    counter=0
    for item in result:
        print "%-2d:%s"%(counter,item[4])
        counter+=1
    
    P70
    import sys,socket
    result=socket.getaddrinfo(sys.argv[1],None,0,socket.SOCK_STREAM)
    counter=0
    for item in result:
        print "%-2d:%s"%(counter,item[4])
        counter+=1
    

    执行反向查询

    import sys,socket
    
    try:
        result=socket.gethostbyaddr(sys.argv[1])
    
        print "Primary hostname:"
        print " "+result[0]
    
        print "
    Addresses:"
        for item in result[2]:
            print " "+item
    
    except socket.herror,e:
        print "Couldn't look up name:",e
    

    正反向查询

    import sys,socket
    def getipaddrs(hostname):
         result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
         return [x[4][0] for x in result]
    def gethostname(ipaddr):
        return socket.gethostbyaddr(ipaddr)[0]
    
    try:
        hostname=gethostname(sys.argv[1])
        ipaddrs=getipaddrs(hostname)
    except socket.herror,e:
        print "NO host names available for %s;it may be normal"%sys.argv[1]
        sys.exit(0)
    except socket.gaierror,e:
        print "Got hostname %s,but it could not be forward-resolved:%s"%(hostname,str(e))
        sys.exit(1)
    
    if not sys.argv[1] in ipaddrs:
        print "GOt hostnae %s,but no forward lookup,"% hostname
        print "original IP %s did not appear in IP address list"% sys.argv[1]
        sys.exit(1)
    
    print "Validated hostname:",hostname
    

    获得完整域名,gethostname()获得主机名,getfqdn()获得完整信息,getaddrinfo()获得该域名对性的IP地址。

    import sys,socket
    
    def getipaddrs(hostname):
        result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
        return [x[4][0] for x in result]
    
    hostname=socket.gethostname()
    print "Host name:",hostname
    
    print "Fully-qualified name:",socket.getfqdn(hostname)
    try:
        print "IP addresses:",",".join(getipaddrs(hostname))
    except socket.gaierror,e:
        print "Couldn't not get IP addresses:",e
    
    import sys,DNS
    query=sys.argv[1]
    DNS.DiscoverNameServers()
    
    reqobj=DNS.Request()
    
    answerobj=reqobj.req(name=query,qtrpe=DNS.Type.ANY)
    if not len(answerobj.answers):
        print "NOT found."
    for item in answerobj.answers:
        print "%-5s %s"%(item['typename'],item['data'])
    
    import sys,DNS
    
    def hierquery(qstring,qtype):#给出主机名的相应服务器
        reqobj=DNS.Request()#建立查询对象实例
        try:
            answerobj=reqobj.req(name=qstring,qtype=qtype)
            answers=[x['data'] for x in answerobj.answers if x['type']==qtype]
        except DNS.Base.DNSError:
            answers=[]
        if len(answers):
            return answers
        else:
            remainder=qstring.split(".",1)
            if len(remainder)==1:
                return None
            else:
                return hierquery(remainder[1],qtype)
    
    
    def findnameservers(hostname):#取得权威名称服务器列表
        return hierquery(hostname,DNS.Type.NS)
    
    def getrecordsfromnameserver(qstring,qtype,nslist):#在服务器查询,直到找到答案或者查完该表
        for ns in nslist:
            reqobj=DNS.Request(server=ns)
            try:
                answers=reqobj.req(name=qstring,qtype=qtype).answers
                if len(answers):
                    return answers
            except DNS.Base.DNSError:
                pass
            return []
    
    def nslookup(qstring,qtype,verbose=1):
        nslist=findnameservers(qstring)
        if nslist==None:
            raise RuntimeError,"Could not find nameserver to use."
        if verbose:
            print "using nameserver:",",".join(nslist)
        return getrecordsfromnameserver(qstring,qtype,nslist)
    
    if __name__=='__main__':
        query=sys.argv[1]
        DNS.DiscoverNameServers()
    
        answers=nslookup(query,DNS.Type.ANY)
        if not len(answers):
            print "not found."
        for item in answers:
            print "%-5s %s"%(item['typename'],item['data'])
    

    第五章
    超时的用法
    echoserver.py

    import socket,traceback
    
    host = ''
    port = 51432
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    while True:
        try:
            clientsock,clientaddr = s.accept()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
            continue
    
        try:
            print "Got connection from",clientsock.getpeername()
            while  True:
                data = clientsock.recv(4096)
                if not len(data):
                    break
                clientsock.sendall(data)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    
        try:
            clientsock.close()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
    
    
    import struct,sys
    
    def htones(num):
        return struct.pack('!H',num)
    
    def htonl(num):
        return struct.pack('!I',num)
    
    def ntohs(data):
        return struct.unoack('!H',data)[0]
    
    def ntohl(data):
        return struct.unpack('!I',data)[0]
    
    def sendstring(data):
        return htonl(len(data))+data
    
    print "Enter a string:"
    str=sys.stdin.readline().rstrip()
    
    print repr(sendstring(str))
    
    
    import socket,sys
    
    host,port = sys.argv[1:]
    
    results = socket.getaddrinfo(host,port,0,socket.SOCK_STREAM)
    
    for result in results:
        print "-"*60
    
        if result[0] == socket.AF_INET:
            print "Family: AF_INET"
        elif result[0] == socket.AF_INET6:
            print "Family: AF_INET6"
        else:
            print "Family:",result[0]
    
        if result[1] == socket.SOCK_STREAM:
            print "Socket Type: SOCK_STREAM"
        elif result[1] == socket.SOCK_DGRAM:
            print "Socket Type: SOCK_DGRAM"
    
        print "Protocol:",result[2]
        print "Canonical Name:",result[3]
        print "Socket Address:",result[4]
    

    先找ipv4,再找ivp6
    Connect Example with ipv6 Awareness ------------- ipv6connect.py

    import socket,sys
    
    def getaddrinfo_pref(host,port,socktype,familypreference=socket.AF_INET):#ipv4
    
        results = socket.getaddrinfo(host,port,0,socktype)
    
        for result in results:
            if result[0] == familypreference:
                return result
        return results[0]
    
    host = sys.argv[1]
    port = 'http'
    
    c = getaddrinfo_pref(host,port,socket.SOCK_STREAM)
    print "Connecting to",c[4]
    
    s = socket.socket(c[0],c[1])
    s.connect(c[4])
    s.sendall("HEAD / HTTP/1.0
    
    ")
    
    while True:
        buf = s.recv(4096)
    
        if not len(buf):
            break
        sys.stdout.write(buf)
    
    
    Echo Server Bound to Specific Address
    bindserver.py
    
    import socket,traceback
    
    host = '127.0.0.1'
    port = 51423
    
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    
    while True:
        clientsock,clientaddr = s.accept()
    
        print "Got connection from",clientsock.getpeername()
    
        while True:
            data = clientsock.recv(4096)
            if not len(data):
                break
            clientsock.sendall(data)
    
        clientsock.close()
    
    pull()
    
    import socket,sys,select
    
    
    port = 51423
    host = 'localhost'
    
    spinsize = 10
    spinpos = 0
    spindir = 1
    
    def spin():
        global spinsize,spinpos,spindir
    
        spinstr = '.' * spinpos + '|' + '.'*(spinsize-spinpos-1)
        sys.stdout.write('
    '+spinstr+' ')
        sys.stdout.flush()
    
        spinpos += spindir
    
        if spinpos < 0:
            spindir = 1
            spinpos = 1
        elif spinpos >= spinsize:
            spinpos -= 2
            spindir = -1
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    
    p = select.poll()
    
    p.register(s.fileno(),select.POLLIN | select.POLLERR | select.POLLHUP)
    
    while True:
        results = p.poll(50)
    
        if len(results):
            if results[0][1] == select.POLLIN:
                data = s.recv(4096)
                if not len(data):
                    print ("
    Remove end closed connection ; exiting.")
                    break
                sys.stdout.write("
    Received: " + data)
                sys.stdout.flush()
    
            else:
                print "
    Problem occurred exitng."
                sys.exit(0)
        spin()
    
    
    selectclient.py
    
    import socket,sys,select
    
    port = 51423
    host = 'localhost'
    
    spinsize = 10
    spinpos = 0
    spindir = 1
    
    def spin():
        global spinsize,spinpos,spindir
    
        spinstr = '.' * spinpos + '|' + '.' *(spinsize - spinpos -1)
        sys.stdout.write('
    ' + spinstr +' ')
        sys.stdout.flush()
    
        spinpos += spindir
    
        if spinpos < 0:
            spindir = 1
            spinpos = 1
        elif spinpos >= spinsize:
            spinpos -= 2
            spindir = -1
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    
    while True:
        infds,outfds,errfds = select.select([s],[],[s],0.05)
    
        if len(infds):
    
            data = s.recv(4096)
            if not len(data):
                print("
    Remote end closed connection; Exiting.")
                break
            sys.stdout.write("
    Received: " + data)
            sys.stdout.flush()
    
        if len(errfds):
            print "
    Problen occurred; exiting."
            sys.exit(0)
        spin()
    

    二、TCP通信方式

    服务端:

    #coding=utf-8
    from socket import*
    
    #监听套接字的连接和回应
    #服务器端
    myHost='' #‘’代表主机所有可用端口
    myPort=50007
    
    sockobj=socket(AF_INET,SOCK_STREAM)#创建一个TCP scoket 对象
    sockobj.bind((myHost,myPort))#绑定服务端口号
    sockobj.listen(5)#监听,允许5个挂起连接
    
    while True:#一直监听直到进程被杀死
        connection,address=sockobj.accept()#等待下个客户端连接
        print('Server connected by:',address)#连接是新的scoket
        while True:
            data=connection.recv(1024)#读取新的客户端scoket,for 循环接收
            if not data:break#发送接收报文给客户端
            connection.send(b'Echo get your message:'+data)#直到结束关闭scoket,发送只能是b,bite格式
        connection.close()
    

    客户端:

    #coding=utf-8
    import socket
    
    #客户端
    import sys
    from socket import *
    serverHost='localhost'
    serverPort=50007
    
    message=[b'hello network world']
    
    if len(sys.argv)>1:
        serverHost=sys.argv[1]
        if len(sys.argv)>2:
            message=(x.encode()for x in sys.argv[2:])
    
    sockobj=socket(AF_INET,SOCK_STREAM)
    sockobj.connect((serverHost,serverPort))
    
    for line in message:
        sockobj.send(line)
        data=sockobj.recv(1024)#1024字节
        print('Client received:',data)
    
    sockobj.close()
    

    三、UDP通信方式

    UDP请求端:

    #coding=utf-8
    import socket,sys
    
    host=sys.argv[1]
    textpost=sys.argv[2]
    
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    try:
        port=int(textpost)
    except ValueError:
        print "输入错入"
        port=socket.getservbyname(textpost,'udp')
    
    s.connect((host,port))
    print "Enter data to transmit: "
    data=sys.stdin.readline().strip()
    s.sendall(data)
    print "Looking for replies."
    while(1):
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    UDP应答端:
    #coding=utf-8
    import socket,traceback
    
    host=''
    port=54132
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    
    while 1:
        try:
            message,address=s.recvfrom(8192)
            print "Got data from",address
            s.sendto(message,address)
        except(KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    UDP查询时间
    服务端:
    #coding=utf-8
    import socket,traceback,time,struct
    
    host=''
    port=51432
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    
    while 1:
        try:
            message,address=s.recvfrom(8192)
            print message,address
            secs=int(time.time())
            secs-=60*60*24
            secs+=220898800
            reply=struct.pack("!I",secs)
            s.sendto(reply,address)
        except(KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    

    客户端:

    #coding=utf-8
    import socket,sys,struct,time
    hostname='localhost'
    port=51432
    
    host=socket.gethostbyname(hostname)
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.sendto('',(host,port))
    
    print "Loking for replies"
    buf=s.recvfrom(2048)[0]
    if len(buf)!=4:
        print "Wrong-size reply %d:%s"%(len(buf),buf)
        sys.exit(1)
    
    secs=struct.unpack("!I",buf)[0]
    secs-=220898800
    print time.ctime(int(secs))
    超时:
    #coding=utf-8
    import socket,traceback
    
    host = ''
    port = 51432
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    while True:
        try:
            clientsock,clientaddr = s.accept()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
            continue
    
        try:
            print "Got connection from",clientsock.getpeername()
            while  True:
                data = clientsock.recv(4096)
                if not len(data):
                    break
                clientsock.sendall(data)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    
        try:
            clientsock.close()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
    四、ftp
    #coding=utf-8
    #自动抓取并打开远程文件文件
    import os,sys
    from getpass import getpass
    from ftplib import FTP
    
    nonpassive=False
    filename='monkeys.jpg'
    dirname='.'
    sitename='ftp.rmi.net'
    userinfo=('lutz',getpass('pwd?'))
    if len(sys.argv)>1:filename=sys.argv[1]
    
    print('Connection...')
    connection=FTP(sitename)
    connection.login(*userinfo)
    connection.cwd(dirname)
    if nonpassive:
        connection.set_pasv(False)
    
    print('Downloading...')
    localfile=open(filename,'wb')
    connection.retrbinary('RETR'+filename,localfile.write,1024)
    connection.quit()
    localfile.close()
    
    if input('Open file?') in ['Y','y']:
        from PP4E.System.Media.playfile import playfile
        playfile(filename)
    

    五、广播

    接收端:

    #coding=utf-8
    import socket,traceback
    
    host = ''
    port = 51423
    
    s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
    s.bind((host,port))
    
    while True:
        try:
            message,address =s.recvfrom(8192)
            print "Got data from ",address
            s.sendto("I am here",address)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    

    发送端:

    #coding=utf-8
    import socket,sys
    dest = ('<broadcast>',51423)
    
    s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
    s.sendto("Hello",dest)
    
    print "Looking for replies; press Ctrl-C to stop."
    
    while True:
        (buf,address) = s.recvfrom(2048)
        if not len(buf):
            break
        print "Received from %s: %s" % (address,buf)
    

    第二部分web Service

    一、web客户端访问

    urllib2扩展性更好
    1.下载Web界面
    2.在远程HTTP服务器上验证
    3.提交表单(from)数据
    4.处理错误
    5.与非HTTP协议通信

    1.下载Web界面
    (1)

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    sys.stdout 是标准输出文件。write就是往这个文件写数据。
    合起来就是打印数据到标准输出。类似print

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/dump_page.py
    http://www.example.com

    <!doctype html>
    <html>
    <head>
        <title>Example Domain</title>
    
        <meta charset="utf-8" />
        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />
        <style type="text/css">
        body {
            background-color: #f0f0f2;
            margin: 0;
            padding: 0;
            font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
    
        }
        div {
             600px;
            margin: 5em auto;
            padding: 50px;
            background-color: #fff;
            border-radius: 1em;
        }
        a:link, a:visited {
            color: #38488f;
            text-decoration: none;
        }
        @media (max- 700px) {
            body {
                background-color: #fff;
            }
            div {
                 auto;
                margin: 0 auto;
                border-radius: 0;
                padding: 1em;
            }
        }
        </style>
    </head>
    
    <body>
    <div>
        <h1>Example Domain</h1>
        <p>This domain is established to be used for illustrative examples in documents. You may use this
        domain in examples without prior coordination or asking for permission.</p>
        <p><a href="http://www.iana.org/domains/example">More information...</a></p>
    </div>
    </body>
    </html>
    

    Process finished with exit code 0

    (2)

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"%(key,value)
    

    运行结果如下:
    D:pythonpython.exe E:/code/python/unit6/dump_info.py http://httpd.apache.org/dev
    Retrieved http://httpd.apache.org/dev/
    content-length=8870
    accept-ranges=bytes
    vary=Accept-Encoding
    server=Apache/2.4.7 (Ubuntu)
    last-modified=Wed, 25 Jan 2017 14:38:55 GMT
    connection=close
    etag="22a6-546ec313cb061"
    date=Fri, 17 Mar 2017 06:29:52 GMT
    content-type=text/html

    Process finished with exit code 0

    注:从geturl()得到的值与传入Request的对象不同,结尾处多了一条斜线,远程服务器做了一个Http转向,urllib自动跟随了转向。
    其他行显示Http的header信息;

    2.在远程HTTP服务器上验证

    (1)

    #coding=utf-8
    import sys,urllib2,getpass
    
    class TerminalPassword(urllib2.HTTPPasswordMgr):
        def find_user_password(self, realm, authuri):
            ret=urllib2.HTTPPasswordMgr.find_user_password(self,realm,authuri)
    
            if ret[0] == None and ret[1] == None:
                sys.stdout.write("Login reauired for %s at %sn" % (realm,authuri))
                sys.stdout.write("Username: ")
                username = sys.stdin.readline().rstrip()
                password = getpass.getpass().rstrip()
                return (username, password)
            else:
                return ret
    req = urllib2.Request(sys.argv[1])
    opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(TerminalPassword()))
    response = opener.open(req)
    print response.read()
    

    扩展urllib2.HTTPPasswordMgr类,允许程序在需要的时候像操作员询问用户名和密码,
    build_opener:允许指定额外的处理程序,代码需要支持认证,所以HTTPBasicAuthHandler加到处理链接

    3.提交表单(from)数据
    GET方法:把表单数据编码至url,在给出请求的页面后,加一个问号,接着是表单的元素。每个键和值对用“&”分割,有些字符需要被避免。不适合数据量比较大的地方。
    (1)

    代码:
    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    sys.stdout 是标准输出文件。write就是往这个文件写数据。
    合起来就是打印数据到标准输出。类似print

    运行结果:
    D:pythonpython.exe E:/code/python/unit6/dump_page.py http://weixin.sogou.com/weixin?p=01030402&query=博客园&type=2&ie=utf8

    <!doctype html>

    注:必须给url加上引号 (2) 代码:
    #coding=utf-8
    import sys,urllib2,urllib
    
    def addGETdata(url,data):
        return url+'?'+urllib.urlencode(data)
    
    zipcode=sys.argv[1]
    url=addGETdata('http://www.weather.com.cn/cgi-bin/findweather/getForecast',[('query',zipcode)])
    
    print "using URL",url
    req=urllib2.Request(url)
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    注:函数addGETdata(url,data)负责在url结尾添加所有的数据。在内部,他在URL和通过urllib.urlencode()得到的数据间添加问号。

    POST方法:单独部分发送。URL永远不会被修改,附加信息通过第二个参数传递给urlopen().
    (3)
    代码:

    #coding=utf-8
    import sys,urllib2,urllib
    
    zipcode=sys.argv[1]
    url='http://www.wunderground.com/cgi-bin/findweather/getForcecast'
    data=urllib.urlencode([('query',zipcode)])
    req=urllib2.Request(url)
    fd=urllib2.urlopen(req,data)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    4.处理错误

    (1)
    代码:

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.URLError,e:
        print "Error reteiveving data:",e
        sys.exit(1)
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"% (key,value)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/error_basic.py
    https://www.wunderground.com/cgi-bin/findweather/getForcecast
    Error reteiveving data: HTTP Error 404: Not Found

    Process finished with exit code 1

    (2)
    代码:

    #coding=utf-8
    # import sys,urllib2
    #
    # req=urllib2.Request(sys.argv[1])
    #
    # try:
    #     fd=urllib2.urlopen(req)
    # except urllib2.URLError,e:
    #     print "Error reteiveving data:",e
    #     sys.exit(1)
    # print "Retrieved",fd.geturl()
    # info=fd.info()
    # for key,value in info.items():
    #     print "%s=%s"% (key,value)
    
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.HTTPError,e:
        print "Error reteiveving data:",e
        print "Server error document follows:
    "
        print e.read
        sys.exit(1)
    except urllib2.URLError,e:
        print "Error retriveving data",e
        sys.exit(2)
    
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"% (key,value)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/error_basic.py
    https://www.wunderground.com/cgi-bin/findweather/getForcecast
    Error reteiveving data: HTTP Error 404: Not Found
    Server error document follows:

    <bound method _fileobject.read of <socket._fileobject object at
    0x0216A5B0>>

    Process finished with exit code 1

    注:如果产生了一个HTTPEroor的实力,会捕获异常打印细节。否则,urllib2.URLError类的实例,会显示一条URLError信息。

    读取数据错误:
    通信错误,会使socket模块调用read()函数时发生socket.error;(会通过系统层传递)
    没有通信情况下发送的文档被删节;

    (3)
    代码:

    #coding=utf-8
    import sys,urllib2,socket
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.HTTPError,e:
        print "Error retrieving data:",e
        print "Sever error document follows:
    "
        print e.read()
        sys.exit(1)
    except urllib2.URLError,e:
        print "Error retrieving data:",e
        sys.exit(2)
    
    print "Retrieved",fd.geturl()
    
    bytesread=0
    
    while 1:
        try:
            data=fd.read(1024)
        except socket.error,e:
            print "Error reading data:",e
            sys.exit(3)
    
        if not len(data):
            break
        bytesread+=len(data)
        sys.stdout.write(data)
    
        if fd.info().has_key('Content-Length') and long(fd.info()['Content-Length'])!=long(bytesread):
            print "Excepted a document of size %d,but read %d bytes"%(long(fd.info()['Content-Length']),bytesread)
            sys.exit(4)
    

    运行结果:

    
    > D:pythonpython.exe E:/code/python/unit6/erroe_all.py
    > https://www.wunderground.com/cgi-bin/findweather/getForcecast
    > Error retrieving data: HTTP Error 404: Not Found
    > Sever error document follows:
    > 
    > 
    > <!DOCTYPE html>
    > <!--[if IE 9]><html class="no-js ie9"> <![endif]-->
    > <!--[if gt IE 9]><!--> <html class="no-js "> <!--<![endif]-->
    > 	<head>
    > 		<title>Error | Weather Underground</title>
    > 		<link href="//icons.wxug.com/" rel="dns-prefetch" />
    > 		<link href="//api-ak.wunderground.com/" rel="dns-prefetch" />
    > <meta charset="utf-8">
    > <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    

    二、解析html和xhtml

    第七章 解析Html 和XHtml p151-p168
    1.提取标题
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit7/basic_title.py
    E:/code/python/unit7/faqs.html
    Title is: Appendix?B. MySQL 5.6 Frequently Asked Questions

    Process finished with exit code 0

    注:从表中摘取数据,或

    2.改进
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
        def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&'+name+';')
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    etitle.html

    <!DOCTYPE html>
    <html >
    <head>
        <title>Document Title &amp;Intro</title>
    </head>
    <body>
    this is my text.
    </body>
    </html>
    

    运行结果一:

    D:pythonpython.exe E:/code/python/unit7/basic_title.py
    E:/code/python/unit7/etitle.html
    Title is: Document Title Intro

    Process finished with exit code 0
    运行结果二:

    D:pythonpython.exe E:/code/python/unit7/etitle.py
    E:/code/python/unit7/etitle.html
    Title is: Document Title &Intro

    Process finished with exit code 0

    当一个实体出现时,代码检查该实体是否可以识别,可以,转换为相应得知,否则输入流中的文字;

    3.转换字符参考
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
        def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&'+name+';')
        def handle_charref(self, name):
            try:
                charnum=int(name)
            except ValueError:
                return
            if charnum<1 or charnum>225:
                return
            self.handle_data(chr(charnum))
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    4.处理不均衡的标签
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys,re
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.taglevels=[]
            self.handledtags=['title','ul','li']
            self.processing=None
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if len(self.taglevels) and self.taglevels[-1] == tag:
                self.handle_endtag(tag)
    
            self.taglevels.append(tag)
            if tag in self.handledtags:
                self.data = ''
                self.processing = tag
                if tag == 'ul':
                    print"List start"
    
        def handle_data(self, data):
            if self.processing:
                self.data += data
    
        def handle_endtag(self, tag):
            if not tag in self.taglevels:
                return
    
            while len(self.taglevels):
                starttag = self.taglevels.pop()
    
                if starttag in self.handledtags:
                    self.finishprocessing(starttag)
    
                if starttag == tag:
                    break
    
        def cleanse(self):
            self.data = re.sub('s+', ' ', self.data)
    
        def finishprocessing(self, tag):
            self.cleanse()
            if tag == 'title' and tag == self.processing:
                print "Dom title", self.data
            elif tag == 'ul':
                print "List ended"
            elif tag == 'li' and tag == self.processing:
                print "List item", self.data
            self.processing = None
    
        def gettitle(self):
            return self.title
    

    处理特殊值,如果在映射表中有对应的,即采用映射的值,否则为字面值

     def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&' + name + ';')
    
        def handle_charref(self, name):
            try:
                charnum = int(name)
            except ValueError:
                return
    
            if charnum < 1 or charnum > 255:
                return
    
            self.handle_data(chr(charnum))
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit7/4un.py
    E:/code/python/unit7/4un.html
    Dom title DOCTYPE Title & Intro?
    List start
    List item First List item
    List item second list item
    List item second list item
    List ended

    Process finished with exit code 0

    5.一个可以实际工作的例子

    三、XML和XML-RPC

    P169-p190
    展示XML文档:tree,event.基于事件的解析器可以扫描文档,事件解析器可以响应。
    8.2 使用Dom
    代码:

    #coding=utf-8
    from xml.dom import minidom,Node
    
    def scanNode(node,level=0):
        msg = node.__class__.__name__
        if node.nodeType == Node.ELEMENT_NODE:
            msg += ",tag" + node.tagName
        print " " * level * 4, msg
        if node.hasChildNodes:
            for child in node.childNodes:
                scanNode(child, level + 1)
    
    
    doc = minidom.parse("Sample.xml")
    scanNode(doc)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un1.py

     Document
    >      Element,tagbook
    >          Text
    >          Element,tagtitle
    >              Text
    >          Text
    >          Element,tagauthor
    >              Text
    >              Element,tagname
    >                  Text
    >                  Element,tagfirst
    >                      Text
    >                  Text
    >                  Element,taglast
    >                      Text
    >                  Text
    >              Text
    >              Element,tagaffiliation
    >                  Text
    >              Text
    >          Text
    >          Element,tagchapter
    >              Text
    >              Element,tagtitle
    >                  Text
    >              Text
    >              Element,tagpara
    >                  Text
    >                  Element,tagcompany
    >                      Text
    >                  Text
    >              Text
    >          Text
    

    Process finished with exit code 0

    sample.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <book>
        <title> Sample XML Thing </title>
        <author>
            <name>
                <first>Benjamin</first>
                <last>Smith</last>
            </name>
            <affiliation>Springy Widgets,Inc.</affiliation>
        </author>
    
        <chapter number = "1">
            <title>First chapter</title>
            <para>
                I think widgets are great.you should buy lots
                of them from <company>Springy widgets,Inc</company>
            </para>
        </chapter>
    </book>
    

    2.使用dom完全解析
    代码:

    #coding=utf-8
    """
    将XML以文本形式重新格式化输出
    1.使用Node的节点类型,判断下一步如何处理
    2.对不同的节点名(tagName)进行相应的处理
    """
    from xml.dom import minidom, Node
    import re, textwrap
    
    
    class SampleScanner:
        def __init__(self, doc):
            for child in doc.childNodes:
                if child.nodeType == Node.ELEMENT_NODE and child.tagName == "book":
                    """只处理book元素"""
                    self.handleBook(child)
    
        def gettext(self, nodelist):
            """获取当前节点的文本,
            1.如果当前的节点为TEXT_NODE,将文本追加到列表中
            2.如果当前的节点不是TEXT_NODE,递归地调用gettext"""
            retlist = []
            for node in nodelist:
                if node.nodeType == Node.TEXT_NODE:
                    retlist.append(node.wholeText)
                elif node.hasChildNodes:
                    retlist.append(self.gettext(node.childNodes))
    
            return re.sub("s+", " ", "".join(retlist))
    
        def handleBook(self, node):
            """处理Book节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是title,直接打印出文本内容
            3.如果是author,调用handleAuthor,继续处理节点
            4.如果是chapter,调用handleChapter,继续处理节点
            """
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "title":
                    print "Book title is :", self.gettext(child.childNodes)
                if child.tagName == "author":
                    self.handleAuthor(child)
                if child.tagName == "chapter":
                    self.handleChapter(child)
    
        def handleAuthor(self, node):
            """处理Autho节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是name,调用handleAuthoerName,继续处理节点
            3.如果是affiliation,调用gettext,并打印出来
            """
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "name":
                    self.handleAuthorName(child)
                elif child.tagName == "affiliation":
                    print "Author affiliation:", self.gettext([child])
    
        def handleAuthorName(self, node):
            """处理author.name节点
            1.使用getElementsByTagName获得子节点
            2.调用gettext得到子节点的文本,并打印处理
            """
            surname = self.gettext(node.getElementsByTagName("last"))
            givenname = self.gettext(node.getElementsByTagName("first"))
    
            print "Author Name:%s %s " % (surname, givenname)
    
        def handleChapter(self, node):
            """处理chapter节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是para,调用handlePara,继续处理
            """
            print "*** Start of Chapter %s,%s" % (
            node.getAttribute("number"), self.gettext(node.getElementsByTagName("title")))
    
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "para":
                    self.handlePara(child)
    
        def handlePara(self, node):
            """
            1.获取当前节点的文本
            2.调用textwrap格式化文本
            """
            paratext = self.gettext([node])
            paratext = textwrap.fill(paratext)
            print paratext
    
    
    doc = minidom.parse("Sample.xml")
    SampleScanner(doc)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un2.py
    Book title is : Sample XML Thing
    Author Name:Smith Benjamin
    Author affiliation: Springy Widgets,Inc.
    *** Start of Chapter 1,First chapter
    I think widgets are great.you should buy lots of them from Springy
    widgets,Inc

    Process finished with exit code 0

    3.使用Dom产生文档
    代码:

    #coding=utf-8
    """
    使用minidom生成XML
    1.创建Element,createElement
    2.添加子节点,appendChild
    3.创建Text,createTextNode
    4.创建属性,createAttribute
    """
    from xml.dom import minidom,Node
    
    # 创建Document
    doc = minidom.Document()
    # 创建book节点
    book = doc.createElement("book")
    doc.appendChild(book)
    # 创建Title节点
    title = doc.createElement("title")
    text = doc.createTextNode("Sample XML Thing")
    title.appendChild(text)
    book.appendChild(title)
    # 创建author节点
    author = doc.createElement("author")
    # 创建name节点
    name = doc.createElement("name")
    first = doc.createElement("first")
    first.appendChild(doc.createTextNode("Benjamin"))
    name.appendChild(first)
    
    last = doc.createElement("last")
    last.appendChild(doc.createTextNode("Smith"))
    name.appendChild(last)
    
    author.appendChild(name)
    book.appendChild(author)
    # author节点完毕
    
    # 创建chapter节点
    chapter = doc.createElement("chapter")
    chapter.setAttribute("number","1")
    title = doc.createElement("title")
    title.appendChild(doc.createTextNode("Fisrt Chapter"))
    chapter.appendChild(title)
    
    para = doc.createElement("para")
    para.appendChild(doc.createTextNode("I think widgets are great.you should buy lots 
    of them from"))
    company = doc.createElement("company")
    company.appendChild(doc.createTextNode("Springy widgets,Inc"))
    para.appendChild(company)
    
    chapter.appendChild(para)
    # chapter节点完毕
    book.appendChild(chapter)
    # book节点完毕
    
    print doc.toprettyxml(indent = " ")
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un3.py

    <?xml version="1.0" ?>
    <book>
     <title>Sample XML Thing</title>
     <author>
      <name>
       <first>Benjamin</first>
       <last>Smith</last>
      </name>
     </author>
     <chapter number="1">
      <title>Fisrt Chapter</title>
      <para>
       I think widgets are great.you should buy lots of them from
       <company>Springy widgets,Inc</company>
      </para>
     </chapter>
    </book>
    

    Process finished with exit code 0

    4.dom类型参考

    8.3使用xml-rpc
    5.
    代码:

    #coding=utf-8
    import xmlrpclib
    url='http://liandesinian.blog.51cto.com/7737219/1565474'
    s=xmlrpclib.ServerProxy(url)
    catdata=s.meerkat.getCategories()
    cattiles=[item['title'] for item in catdata]
    cattiles.sort()
    for item in cattiles:
        print item
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un6.py

    Process finished with exit code 0

    代码:

    #coding=utf-8
    import xmlrpclib,sys,textwrap
    
    class NewsCat:
        def __init__(self,catdata):
            self.id=catdata['id']
            self.title=catdata['title']
        def __cmp__(self, other):
            return cmp(self.title,other.title)
    
    class NewsSource:
        def __init__(self,url='http://www.oreillynet.com/meerkat/xml-rpc/server.php'):
            self.s=xmlrpclib.ServerProxy(url)
            self.loadcats()
    
        def loadcats(self):
            print "Loading categories...."
            catdata=self.s.meerkat.getCatgries()
            self.cats=[NewsCat(item) for item in catdata]
            self.cat.sort()
    
        def displaycats(self):
            numonline=0
            i=0
            for item in self.cats:
                sys.stdout.write("%2d:%20.20s"%(i+1,item.title))
                i+=1
                numonline+=1
                if numonline%3==0:
                    sys.stdout.write("
    ")
            if numonline!=0:
                sys.stdout.write("
    ")
    
    
    def promotcat(self):
        sys.__displaycats()
        sys.stdout.write("select a catgory or q to quit")
        selection = sys.stdin.readline().strip()
        if selection == 'q':
            sys.exit(0)
        return int(selection) - 1
    
    
    def dispact(self, cat):
        items = self.s.meerkat.getItems({'category': cat,
                                         'ids': 1,
                                         'descriptions': 1,
                                         'categories': 1,
                                         'channels': 1,
                                         'data': 1,
                                         'num_items': 15})
        if not len(items):
            print "Sorry,no items in that category."
            sys.stdout.write("Press Enter to continue:")
            sys.stdin.readline()
            return
        while 1:
            print self.dispitemsummary(items)
            sys.stdout.write("select a catgory or q to quit")
            selection = sys.stdin.readline().strip()
            if selection=='q':
                 return
    
            self.dispitem(items[int(selection)-1])
    
    
    def dispitemsummary(self, items):
        counter = 0
        for item in items:
            print "%2d:%s"(counter + 1, item['title'])
            counter += 1
    
    
    def dispitem(self, item):
        print "---%s---" % item['title']
        print "Posted on", item['data']
        print "Description:"
        print textwrap.fill(item['description'])
        print "
    link:", item['link']
        sys.stdout.write("
    Press Enter to continue: ")
        sys.stdin.readline()
        n = NewsSource()
        while 1:
            cat = n.promotcat()
            n.dispact(cat)
    
    本性的苏醒,往往在遭遇真实之后。
  • 相关阅读:
    tyvj 1031 热浪 最短路
    【bzoj2005】 [Noi2010]能量采集 数学结论(gcd)
    hdu 1394 Minimum Inversion Number 逆序数/树状数组
    HDU 1698 just a hook 线段树,区间定值,求和
    ZeptoLab Code Rush 2015 C. Om Nom and Candies 暴力
    ZeptoLab Code Rush 2015 B. Om Nom and Dark Park DFS
    ZeptoLab Code Rush 2015 A. King of Thieves 暴力
    hdoj 5199 Gunner map
    hdoj 5198 Strange Class 水题
    vijos 1659 河蟹王国 线段树区间加、区间查询最大值
  • 原文地址:https://www.cnblogs.com/chance88/p/6572879.html
Copyright © 2011-2022 走看看